[lld] 6f63216 - [lld-macho] Extend SyntheticSections to cover all segment load commands

Shoaib Meenai via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 27 12:58:27 PDT 2020


Author: Jez Ng
Date: 2020-04-27T12:58:12-07:00
New Revision: 6f63216c3d6450c885e40b6a67e46563ce3d9746

URL: https://github.com/llvm/llvm-project/commit/6f63216c3d6450c885e40b6a67e46563ce3d9746
DIFF: https://github.com/llvm/llvm-project/commit/6f63216c3d6450c885e40b6a67e46563ce3d9746.diff

LOG: [lld-macho] Extend SyntheticSections to cover all segment load commands

Previously, the special segments `__PAGEZERO` and `__LINKEDIT` were
implemented as special LoadCommands. This diff implements them using
special sections instead which have an `isHidden()` attribute. We do not
emit section headers for hidden sections, but we use their addresses and
file offsets to determine that of their containing segments. In addition
to allowing us to share more segment-related code, this refactor is also
important for the next step of emitting dylibs:

1) dylibs don't have segments like __PAGEZERO, so we need an easy way of
   omitting them w/o messing up segment indices
2) Unlike the kernel, which is happy to run an executable with
   out-of-order segments, dyld requires dylibs to have their segment
   load commands arranged in increasing address order. The refactor
   makes it easier to implement sorting of sections and segments.

Differential Revision: https://reviews.llvm.org/D76839

Added: 
    

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/OutputSegment.cpp
    lld/MachO/OutputSegment.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp
    lld/MachO/Writer.h
    lld/test/MachO/segments.s

Removed: 
    lld/test/MachO/text-segment.s


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 2dd58dc02094..bff12009df03 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -139,10 +139,6 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
     return !errorCount();
   }
 
-  getOrCreateOutputSegment("__TEXT", VM_PROT_READ | VM_PROT_EXECUTE);
-  getOrCreateOutputSegment("__DATA", VM_PROT_READ | VM_PROT_WRITE);
-  getOrCreateOutputSegment("__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE);
-
   for (opt::Arg *arg : args) {
     switch (arg->getOption().getID()) {
     case OPT_INPUT:
@@ -167,14 +163,6 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
     for (InputSection *sec : file->sections)
       inputSections.push_back(sec);
 
-  // Add input sections to output segments.
-  for (InputSection *isec : inputSections) {
-    OutputSegment *os =
-        getOrCreateOutputSegment(isec->segname, VM_PROT_READ | VM_PROT_WRITE);
-    isec->parent = os;
-    os->sections[isec->name].push_back(isec);
-  }
-
   // Write to an output file.
   writeResult();
 

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 4974d81c9ac7..76cf8747d28c 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InputSection.h"
+#include "OutputSegment.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -20,6 +21,10 @@ using namespace lld::macho;
 
 std::vector<InputSection *> macho::inputSections;
 
+uint64_t InputSection::getFileOffset() const {
+  return parent->fileOff + addr - parent->firstSection()->addr;
+}
+
 void InputSection::writeTo(uint8_t *buf) {
   memcpy(buf, data.data(), data.size());
 

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index fea88ea1d2be..30d51537a0e2 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -32,8 +32,14 @@ struct Reloc {
 class InputSection {
 public:
   virtual ~InputSection() = default;
-  virtual void writeTo(uint8_t *buf);
   virtual size_t getSize() const { return data.size(); }
+  virtual uint64_t getFileSize() const { return getSize(); }
+  uint64_t getFileOffset() const;
+  // Don't emit section_64 headers for hidden sections.
+  virtual bool isHidden() const { return false; }
+  // Unneeded sections are omitted entirely (header and body).
+  virtual bool isNeeded() const { return true; }
+  virtual void writeTo(uint8_t *buf);
 
   InputFile *file = nullptr;
   OutputSegment *parent = nullptr;
@@ -41,8 +47,12 @@ class InputSection {
   StringRef segname;
 
   ArrayRef<uint8_t> data;
+
+  // TODO these properties ought to live in an OutputSection class.
+  // Move them once available.
   uint64_t addr = 0;
   uint32_t align = 1;
+  uint32_t sectionIndex = 0;
   uint32_t flags = 0;
 
   std::vector<Reloc> relocs;

diff  --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index 75f5c20146b6..c9a87189c85e 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -7,24 +7,58 @@
 //===----------------------------------------------------------------------===//
 
 #include "OutputSegment.h"
+#include "InputSection.h"
+
 #include "lld/Common/Memory.h"
+#include "llvm/BinaryFormat/MachO.h"
 
 using namespace llvm;
+using namespace llvm::MachO;
 using namespace lld;
 using namespace lld::macho;
 
+static uint32_t initProt(StringRef name) {
+  if (name == segment_names::text)
+    return VM_PROT_READ | VM_PROT_EXECUTE;
+  if (name == segment_names::pageZero)
+    return 0;
+  if (name == segment_names::linkEdit)
+    return VM_PROT_READ;
+  return VM_PROT_READ | VM_PROT_WRITE;
+}
+
+static uint32_t maxProt(StringRef name) {
+  if (name == segment_names::pageZero)
+    return 0;
+  return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
+}
+
+void OutputSegment::addSection(InputSection *isec) {
+  isec->parent = this;
+  std::vector<InputSection *> &vec = sections[isec->name];
+  if (vec.empty() && !isec->isHidden()) {
+    ++numNonHiddenSections;
+  }
+  vec.push_back(isec);
+}
+
+static llvm::DenseMap<StringRef, OutputSegment *> nameToOutputSegment;
 std::vector<OutputSegment *> macho::outputSegments;
 
-OutputSegment *macho::getOrCreateOutputSegment(StringRef name, uint32_t perms) {
-  for (OutputSegment *os : outputSegments)
-    if (os->name == name)
-      // TODO: assert that os->perms == perms, once we figure out what to do
-      // about default-created segments.
-      return os;
-
-  auto *os = make<OutputSegment>();
-  os->name = name;
-  os->perms = perms;
-  outputSegments.push_back(os);
-  return os;
+OutputSegment *macho::getOutputSegment(StringRef name) {
+  return nameToOutputSegment.lookup(name);
+}
+
+OutputSegment *macho::getOrCreateOutputSegment(StringRef name) {
+  OutputSegment *&segRef = nameToOutputSegment[name];
+  if (segRef != nullptr)
+    return segRef;
+
+  segRef = make<OutputSegment>();
+  segRef->name = name;
+  segRef->maxProt = maxProt(name);
+  segRef->initProt = initProt(name);
+
+  outputSegments.push_back(segRef);
+  return segRef;
 }

diff  --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h
index 211750de8761..ec8437529b41 100644
--- a/lld/MachO/OutputSegment.h
+++ b/lld/MachO/OutputSegment.h
@@ -15,6 +15,14 @@
 namespace lld {
 namespace macho {
 
+namespace segment_names {
+
+constexpr const char *text = "__TEXT";
+constexpr const char *pageZero = "__PAGEZERO";
+constexpr const char *linkEdit = "__LINKEDIT";
+
+} // namespace segment_names
+
 class InputSection;
 
 class OutputSegment {
@@ -23,15 +31,32 @@ class OutputSegment {
 
   InputSection *lastSection() const { return sections.back().second.back(); }
 
+  bool isNeeded() const {
+    return !sections.empty() || name == segment_names::linkEdit;
+  }
+
+  void addSection(InputSection *);
+
+  const llvm::MapVector<StringRef, std::vector<InputSection *>> &
+  getSections() const {
+    return sections;
+  }
+
+  uint64_t fileOff = 0;
   StringRef name;
-  uint32_t perms;
+  uint32_t numNonHiddenSections = 0;
+  uint32_t maxProt = 0;
+  uint32_t initProt = 0;
   uint8_t index;
+
+private:
   llvm::MapVector<StringRef, std::vector<InputSection *>> sections;
 };
 
 extern std::vector<OutputSegment *> outputSegments;
 
-OutputSegment *getOrCreateOutputSegment(StringRef name, uint32_t perms);
+OutputSegment *getOutputSegment(StringRef name);
+OutputSegment *getOrCreateOutputSegment(StringRef name);
 
 } // namespace macho
 } // namespace lld

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 3212aea4cfc4..d4af5a706a80 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -7,13 +7,60 @@
 //===----------------------------------------------------------------------===//
 
 #include "SyntheticSections.h"
+#include "InputFiles.h"
+#include "OutputSegment.h"
 #include "Symbols.h"
+#include "Writer.h"
 
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
 using namespace llvm::MachO;
+using namespace llvm::support;
 
 namespace lld {
 namespace macho {
 
+MachHeaderSection::MachHeaderSection() {
+  // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
+  // from the beginning of the file (i.e. the header).
+  segname = segment_names::text;
+  name = section_names::header;
+}
+
+void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
+  loadCommands.push_back(lc);
+  sizeOfCmds += lc->getSize();
+}
+
+size_t MachHeaderSection::getSize() const {
+  return sizeof(mach_header_64) + sizeOfCmds;
+}
+
+void MachHeaderSection::writeTo(uint8_t *buf) {
+  auto *hdr = reinterpret_cast<mach_header_64 *>(buf);
+  hdr->magic = MH_MAGIC_64;
+  hdr->cputype = CPU_TYPE_X86_64;
+  hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64;
+  hdr->filetype = MH_EXECUTE;
+  hdr->ncmds = loadCommands.size();
+  hdr->sizeofcmds = sizeOfCmds;
+  hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL;
+
+  uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
+  for (LoadCommand *lc : loadCommands) {
+    lc->writeTo(p);
+    p += lc->getSize();
+  }
+}
+
+PageZeroSection::PageZeroSection() {
+  segname = segment_names::pageZero;
+  name = section_names::pageZero;
+}
+
 GotSection::GotSection() {
   segname = "__DATA_CONST";
   name = "__got";
@@ -30,6 +77,57 @@ void GotSection::addEntry(DylibSymbol &sym) {
   }
 }
 
+BindingSection::BindingSection() {
+  segname = segment_names::linkEdit;
+  name = section_names::binding;
+}
+
+bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
+
+// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
+// interprets to update a record with the following fields:
+//  * segment index (of the segment to write the symbol addresses to, typically
+//    the __DATA_CONST segment which contains the GOT)
+//  * offset within the segment, indicating the next location to write a binding
+//  * symbol type
+//  * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
+//  * symbol name
+//  * addend
+// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
+// a symbol in the GOT, and increments the segment offset to point to the next
+// entry. It does *not* clear the record state after doing the bind, so
+// subsequent opcodes only need to encode the 
diff erences between bindings.
+void BindingSection::finalizeContents() {
+  if (!isNeeded())
+    return;
+
+  raw_svector_ostream os{contents};
+  os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+                             in.got->parent->index);
+  encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os);
+  for (const DylibSymbol *sym : in.got->getEntries()) {
+    // TODO: Implement compact encoding -- we only need to encode the
+    // 
diff erences between consecutive symbol entries.
+    if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) {
+      os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+                                 sym->file->ordinal);
+    } else {
+      error("TODO: Support larger dylib symbol ordinals");
+      continue;
+    }
+    os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+       << sym->getName() << '\0'
+       << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
+       << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
+  }
+
+  os << static_cast<uint8_t>(BIND_OPCODE_DONE);
+}
+
+void BindingSection::writeTo(uint8_t *buf) {
+  memcpy(buf, contents.data(), contents.size());
+}
+
 InStruct in;
 
 } // namespace macho

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index ba5a8f35b28b..2adc5754a7e8 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -13,10 +13,45 @@
 #include "Target.h"
 #include "llvm/ADT/SetVector.h"
 
+using namespace llvm::MachO;
+
 namespace lld {
 namespace macho {
 
+namespace section_names {
+
+constexpr const char *pageZero = "__pagezero";
+constexpr const char *header = "__mach_header";
+constexpr const char *binding = "__binding";
+
+} // namespace section_names
+
 class DylibSymbol;
+class LoadCommand;
+
+// The header of the Mach-O file, which must have a file offset of zero.
+class MachHeaderSection : public InputSection {
+public:
+  MachHeaderSection();
+  void addLoadCommand(LoadCommand *);
+  bool isHidden() const override { return true; }
+  size_t getSize() const override;
+  void writeTo(uint8_t *buf) override;
+
+private:
+  std::vector<LoadCommand *> loadCommands;
+  uint32_t sizeOfCmds = 0;
+};
+
+// A hidden section that exists solely for the purpose of creating the
+// __PAGEZERO segment, which is used to catch null pointer dereferences.
+class PageZeroSection : public InputSection {
+public:
+  PageZeroSection();
+  bool isHidden() const override { return true; }
+  size_t getSize() const override { return ImageBase; }
+  uint64_t getFileSize() const override { return 0; }
+};
 
 // This section will be populated by dyld with addresses to non-lazily-loaded
 // dylib symbols.
@@ -31,6 +66,8 @@ class GotSection : public InputSection {
 
   size_t getSize() const override { return entries.size() * WordSize; }
 
+  bool isNeeded() const override { return !entries.empty(); }
+
   void writeTo(uint8_t *buf) override {
     // Nothing to write, GOT contains all zeros at link time; it's populated at
     // runtime by dyld.
@@ -40,8 +77,24 @@ class GotSection : public InputSection {
   llvm::SetVector<const DylibSymbol *> entries;
 };
 
+// Stores bind opcodes for telling dyld which symbols to load non-lazily.
+class BindingSection : public InputSection {
+public:
+  BindingSection();
+  void finalizeContents();
+  size_t getSize() const override { return contents.size(); }
+  // Like other sections in __LINKEDIT, the binding section is special: its
+  // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
+  // section headers.
+  bool isHidden() const override { return true; }
+  bool isNeeded() const override;
+  void writeTo(uint8_t *buf) override;
+
+  SmallVector<char, 128> contents;
+};
+
 struct InStruct {
-  GotSection *got;
+  GotSection *got = nullptr;
 };
 
 extern InStruct in;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 7c38487c063c..b6e5ed0c5e4d 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -19,13 +19,10 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
-using namespace llvm::support;
 using namespace lld;
 using namespace lld::macho;
 
@@ -34,90 +31,49 @@ class LCLinkEdit;
 class LCDyldInfo;
 class LCSymtab;
 
-class LoadCommand {
-public:
-  virtual ~LoadCommand() = default;
-  virtual uint32_t getSize() const = 0;
-  virtual void writeTo(uint8_t *buf) const = 0;
-};
-
 class Writer {
 public:
   Writer() : buffer(errorHandler().outputBuffer) {}
 
-  void createLoadCommands();
   void scanRelocations();
-  void assignAddresses();
-
-  void createDyldInfoContents();
+  void createHiddenSections();
+  void sortSections();
+  void createLoadCommands();
+  void assignAddresses(OutputSegment *);
+  void createSymtabContents();
 
   void openFile();
-  void writeHeader();
   void writeSections();
 
   void run();
 
-  std::vector<LoadCommand *> loadCommands;
   std::unique_ptr<FileOutputBuffer> &buffer;
-  uint64_t fileSize = 0;
-  uint64_t sizeofCmds = 0;
-  LCLinkEdit *linkEditSeg = nullptr;
-  LCDyldInfo *dyldInfoSeg = nullptr;
-  LCSymtab *symtabSeg = nullptr;
-};
-
-class LCPagezero : public LoadCommand {
-public:
-  uint32_t getSize() const override { return sizeof(segment_command_64); }
-
-  void writeTo(uint8_t *buf) const override {
-    auto *c = reinterpret_cast<segment_command_64 *>(buf);
-    c->cmd = LC_SEGMENT_64;
-    c->cmdsize = getSize();
-    strcpy(c->segname, "__PAGEZERO");
-    c->vmsize = PageSize;
-  }
-};
-
-class LCLinkEdit : public LoadCommand {
-public:
-  uint32_t getSize() const override { return sizeof(segment_command_64); }
-
-  void writeTo(uint8_t *buf) const override {
-    auto *c = reinterpret_cast<segment_command_64 *>(buf);
-    c->cmd = LC_SEGMENT_64;
-    c->cmdsize = getSize();
-    strcpy(c->segname, "__LINKEDIT");
-    c->vmaddr = addr;
-    c->fileoff = fileOff;
-    c->filesize = c->vmsize = contents.size();
-    c->maxprot = VM_PROT_READ | VM_PROT_WRITE;
-    c->initprot = VM_PROT_READ;
-  }
-
-  uint64_t getOffset() const { return fileOff + contents.size(); }
-
-  uint64_t fileOff = 0;
   uint64_t addr = 0;
-  SmallVector<char, 128> contents;
+  uint64_t fileOff = 0;
+  MachHeaderSection *headerSection = nullptr;
+  BindingSection *bindingSection = nullptr;
 };
 
+// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
 class LCDyldInfo : public LoadCommand {
 public:
+  LCDyldInfo(BindingSection *bindingSection) : bindingSection(bindingSection) {}
+
   uint32_t getSize() const override { return sizeof(dyld_info_command); }
 
   void writeTo(uint8_t *buf) const override {
     auto *c = reinterpret_cast<dyld_info_command *>(buf);
     c->cmd = LC_DYLD_INFO_ONLY;
     c->cmdsize = getSize();
-    c->bind_off = bindOff;
-    c->bind_size = bindSize;
+    if (bindingSection->isNeeded()) {
+      c->bind_off = bindingSection->getFileOffset();
+      c->bind_size = bindingSection->getFileSize();
+    }
     c->export_off = exportOff;
     c->export_size = exportSize;
   }
 
-  uint64_t bindOff = 0;
-  uint64_t bindSize = 0;
+  BindingSection *bindingSection;
   uint64_t exportOff = 0;
   uint64_t exportSize = 0;
 };
@@ -139,7 +95,7 @@ class LCSegment : public LoadCommand {
 
   uint32_t getSize() const override {
     return sizeof(segment_command_64) +
-           seg->sections.size() * sizeof(section_64);
+           seg->numNonHiddenSections * sizeof(section_64);
   }
 
   void writeTo(uint8_t *buf) const override {
@@ -149,22 +105,25 @@ class LCSegment : public LoadCommand {
     c->cmd = LC_SEGMENT_64;
     c->cmdsize = getSize();
     memcpy(c->segname, name.data(), name.size());
+    c->fileoff = seg->fileOff;
+    c->maxprot = seg->maxProt;
+    c->initprot = seg->initProt;
+
+    if (seg->getSections().empty())
+      return;
 
-    // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
-    // from the beginning of the file (i.e. the header).
-    // TODO: replace this logic by creating a synthetic __TEXT,__mach_header
-    // section instead.
-    c->fileoff = name == "__TEXT" ? 0 : seg->firstSection()->addr - ImageBase;
-    c->vmaddr = c->fileoff + ImageBase;
-    c->vmsize = c->filesize =
+    c->vmaddr = seg->firstSection()->addr;
+    c->vmsize =
         seg->lastSection()->addr + seg->lastSection()->getSize() - c->vmaddr;
-    c->maxprot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
-    c->initprot = seg->perms;
-    c->nsects = seg->sections.size();
+    c->nsects = seg->numNonHiddenSections;
 
-    for (auto &p : seg->sections) {
+    for (auto &p : seg->getSections()) {
       StringRef s = p.first;
-      std::vector<InputSection *> &sections = p.second;
+      ArrayRef<InputSection *> sections = p.second;
+      for (InputSection *isec : sections)
+        c->filesize += isec->getFileSize();
+      if (sections[0]->isHidden())
+        continue;
 
       auto *sectHdr = reinterpret_cast<section_64 *>(buf);
       buf += sizeof(section_64);
@@ -173,7 +132,7 @@ class LCSegment : public LoadCommand {
       memcpy(sectHdr->segname, name.data(), name.size());
 
       sectHdr->addr = sections[0]->addr;
-      sectHdr->offset = sections[0]->addr - ImageBase;
+      sectHdr->offset = sections[0]->getFileOffset();
       sectHdr->align = sections[0]->align;
       uint32_t maxAlign = 0;
       for (const InputSection *section : sections)
@@ -260,40 +219,68 @@ class LCLoadDylinker : public LoadCommand {
   // 
diff erent location.
   const StringRef path = "/usr/lib/dyld";
 };
-} // namespace
 
-void Writer::createLoadCommands() {
-  linkEditSeg = make<LCLinkEdit>();
-  dyldInfoSeg = make<LCDyldInfo>();
-  symtabSeg = make<LCSymtab>();
-
-  loadCommands.push_back(linkEditSeg);
-  loadCommands.push_back(dyldInfoSeg);
-  loadCommands.push_back(symtabSeg);
-  loadCommands.push_back(make<LCPagezero>());
-  loadCommands.push_back(make<LCLoadDylinker>());
-  loadCommands.push_back(make<LCDysymtab>());
-  loadCommands.push_back(make<LCMain>());
-
-  uint8_t segIndex = 1; // LCPagezero is a segment load command
-  for (OutputSegment *seg : outputSegments) {
-    if (!seg->sections.empty()) {
-      loadCommands.push_back(make<LCSegment>(seg->name, seg));
-      seg->index = segIndex++;
+class SectionComparator {
+public:
+  struct OrderInfo {
+    uint32_t segmentOrder;
+    DenseMap<StringRef, uint32_t> sectionOrdering;
+  };
+
+  SectionComparator() {
+    // This defines the order of segments and the sections within each segment.
+    // Segments that are not mentioned here will end up at defaultPosition;
+    // sections that are not mentioned will end up at the end of the section
+    // list for their given segment.
+    std::vector<std::pair<StringRef, std::vector<StringRef>>> ordering{
+        {segment_names::pageZero, {}},
+        {segment_names::text, {section_names::header}},
+        {defaultPosition, {}},
+        // Make sure __LINKEDIT is the last segment (i.e. all its hidden
+        // sections must be ordered after other sections).
+        {segment_names::linkEdit, {section_names::binding}},
+    };
+
+    for (uint32_t i = 0, n = ordering.size(); i < n; ++i) {
+      auto &p = ordering[i];
+      StringRef segname = p.first;
+      const std::vector<StringRef> &sectOrdering = p.second;
+      OrderInfo &info = orderMap[segname];
+      info.segmentOrder = i;
+      for (uint32_t j = 0, m = sectOrdering.size(); j < m; ++j)
+        info.sectionOrdering[sectOrdering[j]] = j;
     }
   }
 
-  uint64_t dylibOrdinal = 1;
-  for (InputFile *file : inputFiles) {
-    if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
-      loadCommands.push_back(make<LCLoadDylib>(dylibFile->dylibName));
-      dylibFile->ordinal = dylibOrdinal++;
-    }
+  // Return a {segmentOrder, sectionOrder} pair. Using this as a key will
+  // ensure that all sections in the same segment are sorted contiguously.
+  std::pair<uint32_t, uint32_t> order(const InputSection *isec) {
+    auto it = orderMap.find(isec->segname);
+    if (it == orderMap.end())
+      return {orderMap[defaultPosition].segmentOrder, 0};
+    OrderInfo &info = it->second;
+    auto sectIt = info.sectionOrdering.find(isec->name);
+    if (sectIt != info.sectionOrdering.end())
+      return {info.segmentOrder, sectIt->second};
+    return {info.segmentOrder, info.sectionOrdering.size()};
   }
 
-  // TODO: dyld requires libSystem to be loaded. libSystem is a universal
-  // binary and we don't have support for that yet, so mock it out here.
-  loadCommands.push_back(make<LCLoadDylib>("/usr/lib/libSystem.B.dylib"));
+  bool operator()(const InputSection *a, const InputSection *b) {
+    return order(a) < order(b);
+  }
+
+private:
+  const StringRef defaultPosition = StringRef();
+  DenseMap<StringRef, OrderInfo> orderMap;
+};
+
+} // namespace
+
+template <typename SectionType, typename... ArgT>
+SectionType *createInputSection(ArgT &&... args) {
+  auto *section = make<SectionType>(std::forward<ArgT>(args)...);
+  inputSections.push_back(section);
+  return section;
 }
 
 void Writer::scanRelocations() {
@@ -304,76 +291,76 @@ void Writer::scanRelocations() {
           in.got->addEntry(*dylibSymbol);
 }
 
-void Writer::assignAddresses() {
-  uint64_t addr = ImageBase + sizeof(mach_header_64);
-
-  uint64_t size = 0;
-  for (LoadCommand *lc : loadCommands)
-    size += lc->getSize();
-  sizeofCmds = size;
-  addr += size;
+void Writer::createLoadCommands() {
+  headerSection->addLoadCommand(make<LCDyldInfo>(bindingSection));
+  headerSection->addLoadCommand(make<LCLoadDylinker>());
+  headerSection->addLoadCommand(make<LCSymtab>());
+  headerSection->addLoadCommand(make<LCDysymtab>());
+  headerSection->addLoadCommand(make<LCMain>());
 
+  uint8_t segIndex = 0;
   for (OutputSegment *seg : outputSegments) {
-    addr = alignTo(addr, PageSize);
+    if (seg->isNeeded()) {
+      headerSection->addLoadCommand(make<LCSegment>(seg->name, seg));
+      seg->index = segIndex++;
+    }
+  }
 
-    for (auto &p : seg->sections) {
-      ArrayRef<InputSection *> sections = p.second;
-      for (InputSection *isec : sections) {
-        addr = alignTo(addr, isec->align);
-        isec->addr = addr;
-        addr += isec->getSize();
-      }
+  uint64_t dylibOrdinal = 1;
+  for (InputFile *file : inputFiles) {
+    if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
+      headerSection->addLoadCommand(make<LCLoadDylib>(dylibFile->dylibName));
+      dylibFile->ordinal = dylibOrdinal++;
     }
   }
 
-  addr = alignTo(addr, PageSize);
-  linkEditSeg->addr = addr;
-  linkEditSeg->fileOff = addr - ImageBase;
+  // TODO: dyld requires libSystem to be loaded. libSystem is a universal
+  // binary and we don't have support for that yet, so mock it out here.
+  headerSection->addLoadCommand(
+      make<LCLoadDylib>("/usr/lib/libSystem.B.dylib"));
 }
 
-// LC_DYLD_INFO_ONLY contains symbol import/export information. Imported
-// symbols are described by a sequence of bind opcodes, which allow for a
-// compact encoding. Exported symbols are described using a trie.
-void Writer::createDyldInfoContents() {
-  uint64_t sectionStart = linkEditSeg->getOffset();
-  raw_svector_ostream os{linkEditSeg->contents};
-
-  if (in.got->getSize() != 0) {
-    // Emit bind opcodes, which tell dyld which dylib symbols to load.
-
-    // Tell dyld to write to the section containing the GOT.
-    os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
-                               in.got->parent->index);
-    encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os);
-    for (const DylibSymbol *sym : in.got->getEntries()) {
-      // TODO: Implement compact encoding -- we only need to encode the
-      // 
diff erences between consecutive symbol entries.
-      if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) {
-        os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                                   sym->file->ordinal);
-      } else {
-        error("TODO: Support larger dylib symbol ordinals");
-        continue;
-      }
-      os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
-         << sym->getName() << '\0'
-         << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
-         << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
-    }
-
-    os << static_cast<uint8_t>(BIND_OPCODE_DONE);
+void Writer::createHiddenSections() {
+  headerSection = createInputSection<MachHeaderSection>();
+  bindingSection = createInputSection<BindingSection>();
+  createInputSection<PageZeroSection>();
+}
 
-    dyldInfoSeg->bindOff = sectionStart;
-    dyldInfoSeg->bindSize = linkEditSeg->getOffset() - sectionStart;
+void Writer::sortSections() {
+  llvm::stable_sort(inputSections, SectionComparator());
+
+  // TODO This is wrong; input sections ought to be grouped into
+  // output sections, which are then organized like this.
+  uint32_t sectionIndex = 0;
+  // Add input sections to output segments.
+  for (InputSection *isec : inputSections) {
+    if (isec->isNeeded()) {
+      if (!isec->isHidden())
+        isec->sectionIndex = ++sectionIndex;
+      getOrCreateOutputSegment(isec->segname)->addSection(isec);
+    }
   }
+}
 
-  // TODO: emit bind opcodes for lazy symbols.
-  // TODO: Implement symbol export trie.
+void Writer::assignAddresses(OutputSegment *seg) {
+  addr = alignTo(addr, PageSize);
+  fileOff = alignTo(fileOff, PageSize);
+  seg->fileOff = fileOff;
+
+  for (auto &p : seg->getSections()) {
+    ArrayRef<InputSection *> sections = p.second;
+    for (InputSection *isec : sections) {
+      addr = alignTo(addr, isec->align);
+      isec->addr = addr;
+      addr += isec->getSize();
+      fileOff += isec->getFileSize();
+    }
+  }
 }
 
 void Writer::openFile() {
   Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
-      FileOutputBuffer::create(config->outputFile, fileSize,
+      FileOutputBuffer::create(config->outputFile, fileOff,
                                FileOutputBuffer::F_executable);
 
   if (!bufferOrErr)
@@ -383,49 +370,51 @@ void Writer::openFile() {
     buffer = std::move(*bufferOrErr);
 }
 
-void Writer::writeHeader() {
-  auto *hdr = reinterpret_cast<mach_header_64 *>(buffer->getBufferStart());
-  hdr->magic = MH_MAGIC_64;
-  hdr->cputype = CPU_TYPE_X86_64;
-  hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64;
-  hdr->filetype = MH_EXECUTE;
-  hdr->ncmds = loadCommands.size();
-  hdr->sizeofcmds = sizeofCmds;
-  hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL;
-
-  uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
-  for (LoadCommand *lc : loadCommands) {
-    lc->writeTo(p);
-    p += lc->getSize();
-  }
-}
-
 void Writer::writeSections() {
   uint8_t *buf = buffer->getBufferStart();
-
-  for (OutputSegment *seg : outputSegments)
-    for (auto &sect : seg->sections)
-      for (InputSection *isec : sect.second)
-        isec->writeTo(buf + isec->addr - ImageBase);
-
-  memcpy(buf + linkEditSeg->fileOff, linkEditSeg->contents.data(),
-         linkEditSeg->contents.size());
+  for (OutputSegment *seg : outputSegments) {
+    uint64_t fileOff = seg->fileOff;
+    for (auto &sect : seg->getSections()) {
+      for (InputSection *isec : sect.second) {
+        isec->writeTo(buf + fileOff);
+        fileOff += isec->getFileSize();
+      }
+    }
+  }
 }
 
 void Writer::run() {
-  createLoadCommands();
   scanRelocations();
-  assignAddresses();
+  createHiddenSections();
+  // Sort and assign sections to their respective segments. No more sections can
+  // be created after this method runs.
+  sortSections();
+  // dyld requires __LINKEDIT segment to always exist (even if empty).
+  getOrCreateOutputSegment(segment_names::linkEdit);
+  // No more segments can be created after this method runs.
+  createLoadCommands();
+
+  // Ensure that segments (and the sections they contain) are allocated
+  // addresses in ascending order, which dyld requires.
+  //
+  // Note that at this point, __LINKEDIT sections are empty, but we need to
+  // determine addresses of other segments/sections before generating its
+  // contents.
+  for (OutputSegment *seg : outputSegments)
+    assignAddresses(seg);
+
+  // Fill __LINKEDIT contents.
+  bindingSection->finalizeContents();
 
-  // Fill __LINKEDIT contents
-  createDyldInfoContents();
-  fileSize = linkEditSeg->fileOff + linkEditSeg->contents.size();
+  // Now that __LINKEDIT is filled out, do a proper calculation of its
+  // addresses and offsets. We don't have to recalculate the other segments
+  // since sortSections() ensures that __LINKEDIT is the last segment.
+  assignAddresses(getOutputSegment(segment_names::linkEdit));
 
   openFile();
   if (errorCount())
     return;
 
-  writeHeader();
   writeSections();
 
   if (auto e = buffer->commit())
@@ -435,6 +424,5 @@ void Writer::run() {
 void macho::writeResult() { Writer().run(); }
 
 void macho::createSyntheticSections() {
-  in.got = make<GotSection>();
-  inputSections.push_back(in.got);
+  in.got = createInputSection<GotSection>();
 }

diff  --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h
index accdedca142b..7f846233107a 100644
--- a/lld/MachO/Writer.h
+++ b/lld/MachO/Writer.h
@@ -9,9 +9,18 @@
 #ifndef LLD_MACHO_WRITER_H
 #define LLD_MACHO_WRITER_H
 
+#include <cstdint>
+
 namespace lld {
 namespace macho {
 
+class LoadCommand {
+public:
+  virtual ~LoadCommand() = default;
+  virtual uint32_t getSize() const = 0;
+  virtual void writeTo(uint8_t *buf) const = 0;
+};
+
 void writeResult();
 
 void createSyntheticSections();

diff  --git a/lld/test/MachO/segments.s b/lld/test/MachO/segments.s
index 69e3d9f030b0..15e6c7968f85 100644
--- a/lld/test/MachO/segments.s
+++ b/lld/test/MachO/segments.s
@@ -3,13 +3,46 @@
 # RUN: lld -flavor darwinnew -o %t %t.o
 # RUN: llvm-readobj --macho-segment %t | FileCheck %s
 
-# These segments must always be present.
-# CHECK-DAG: Name: __PAGEZERO
-# CHECK-DAG: Name: __LINKEDIT
-# CHECK-DAG: Name: __TEXT
+## These two segments must always be present at the start of an executable.
+# CHECK-NOT:  Segment {
+# CHECK:      Segment {
+# CHECK:        Cmd: LC_SEGMENT_64
+# CHECK:        Name: __PAGEZERO
+# CHECK:        Size: 72
+# CHECK:        vmaddr:
+# CHECK:        vmsize:
+# CHECK:        fileoff: 0
+# CHECK:        filesize: 0
+## The kernel won't execute a binary with the wrong protections for __PAGEZERO.
+# CHECK:        maxprot: ---
+# CHECK:        initprot: ---
+# CHECK:        nsects: 0
+# CHECK:        flags: 0x0
+# CHECK:      }
+# CHECK:      Segment {
+# CHECK:        Cmd: LC_SEGMENT_64
+# CHECK:        Name: __TEXT
+# CHECK:        Size: 152
+# CHECK:        vmaddr:
+# CHECK:        vmsize:
+## dyld3 assumes that the __TEXT segment starts from the file header
+# CHECK:        fileoff: 0
+# CHECK:        filesize:
+# CHECK:        maxprot: rwx
+# CHECK:        initprot: r-x
+# CHECK:        nsects: 1
+# CHECK:        flags: 0x0
+# CHECK:      }
 
-# Check that we handle max-length names correctly.
-# CHECK-DAG: Name: maxlen_16ch_name
+## Check that we handle max-length names correctly.
+# CHECK:      Cmd: LC_SEGMENT_64
+# CHECK-NEXT: Name: maxlen_16ch_name
+
+## This segment must always be present at the end of an executable.
+# CHECK:      Name: __LINKEDIT
+# CHECK:      maxprot: rwx
+# CHECK:      initprot: r--
+# CHECK-NOT:  Cmd: LC_SEGMENT_64
 
 .text
 .global _main

diff  --git a/lld/test/MachO/text-segment.s b/lld/test/MachO/text-segment.s
deleted file mode 100644
index a3c7edbc61bf..000000000000
--- a/lld/test/MachO/text-segment.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
-# RUN: lld -flavor darwinnew -o %t %t.o
-# RUN: llvm-readobj --macho-segment %t | FileCheck %s
-
-# CHECK: Name: __TEXT
-# CHECK-NOT: }
-# dyld3 assumes that the __TEXT segment starts from the file header
-# CHECK:       fileoff: 0
-
-.text
-.global _main
-_main:
-  mov $0, %rax
-  ret


        


More information about the llvm-commits mailing list