[lld] [lld-macho] Implement support for ObjC relative method lists (PR #86231)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 09:07:28 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: None (alx32)

<details>
<summary>Changes</summary>

The MachO format supports relative offsets for ObjC method lists. This support is present already in ld64. With this change we implement this support in lld also.

Relative method lists can be identified by a specific flag (0x80000000) in the method list header. When this flag is present, the method list will contain 32-bit relative offsets to the current Program Counter (PC), instead of absolute pointers.
Additionally, when relative method lists are used, the offset to the selector name will now be relative and point to the selector reference (selref) instead of the name itself.

---

Patch is 27.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86231.diff


11 Files Affected:

- (modified) lld/MachO/Config.h (+1) 
- (modified) lld/MachO/Driver.cpp (+17) 
- (modified) lld/MachO/InputSection.cpp (+8) 
- (modified) lld/MachO/InputSection.h (+1) 
- (modified) lld/MachO/MapFile.cpp (+15-7) 
- (modified) lld/MachO/ObjC.h (+2) 
- (modified) lld/MachO/Options.td (+6) 
- (modified) lld/MachO/SyntheticSections.cpp (+229) 
- (modified) lld/MachO/SyntheticSections.h (+48) 
- (modified) lld/MachO/Writer.cpp (+5) 
- (added) lld/test/MachO/objc-relative-method-lists-simple.s (+211) 


``````````diff
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index f820513a111ea3..7b45f7f4c39a1b 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -135,6 +135,7 @@ struct Configuration {
   bool emitEncryptionInfo = false;
   bool emitInitOffsets = false;
   bool emitChainedFixups = false;
+  bool emitRelativeMethodLists = false;
   bool thinLTOEmitImportsFiles;
   bool thinLTOEmitIndexFiles;
   bool thinLTOIndexOnly;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 919a14b8bcf08b..495361d089f6e0 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1086,6 +1086,22 @@ static bool shouldEmitChainedFixups(const InputArgList &args) {
   return isRequested;
 }
 
+static bool shouldEmitRelativeMethodLists(const InputArgList &args) {
+  const Arg *arg = args.getLastArg(OPT_objc_relative_method_lists,
+                                   OPT_no_objc_relative_method_lists);
+  if (arg && arg->getOption().getID() == OPT_objc_relative_method_lists)
+    return true;
+  if (arg && arg->getOption().getID() == OPT_no_objc_relative_method_lists)
+    return false;
+
+  // TODO: If no flag is specified, don't default to false, but instead:
+  //   - default false on   <   ios14
+  //   - default true  on   >=  ios14
+  // For now, until this feature is confirmed stable, default to false if no
+  // flag is explicitly specified
+  return false;
+}
+
 void SymbolPatterns::clear() {
   literals.clear();
   globs.clear();
@@ -1629,6 +1645,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   config->emitChainedFixups = shouldEmitChainedFixups(args);
   config->emitInitOffsets =
       config->emitChainedFixups || args.hasArg(OPT_init_offsets);
+  config->emitRelativeMethodLists = shouldEmitRelativeMethodLists(args);
   config->icfLevel = getICFLevel(args);
   config->dedupStrings =
       args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 22930d52dd1db2..e3d7a400e4ce92 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -46,6 +46,14 @@ void lld::macho::addInputSection(InputSection *inputSection) {
   if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
     if (isec->isCoalescedWeak())
       return;
+    if (config->emitRelativeMethodLists &&
+        ObjCMethListSection::isMethodList(isec)) {
+      if (in.objcMethList->inputOrder == UnspecifiedInputOrder)
+        in.objcMethList->inputOrder = inputSectionsOrder++;
+      in.objcMethList->addInput(isec);
+      isec->parent = in.objcMethList;
+      return;
+    }
     if (config->emitInitOffsets &&
         sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
       in.initOffsets->addInput(isec);
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 694bdf734907ba..a0e6afef92cb82 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -342,6 +342,7 @@ constexpr const char moduleTermFunc[] = "__mod_term_func";
 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
 constexpr const char objcCatList[] = "__objc_catlist";
 constexpr const char objcClassList[] = "__objc_classlist";
+constexpr const char objcMethList[] = "__objc_methlist";
 constexpr const char objcClassRefs[] = "__objc_classrefs";
 constexpr const char objcConst[] = "__objc_const";
 constexpr const char objCImageInfo[] = "__objc_imageinfo";
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index f736360624ebd1..2a31a5c09cdd22 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -197,18 +197,24 @@ void macho::writeMapFile() {
                    seg->name.str().c_str(), osec->name.str().c_str());
     }
 
+  // Shared function to print an array of symbols.
+  auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
+    for (const ConcatInputSection *isec : arr) {
+      for (Defined *sym : isec->symbols) {
+        if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
+          os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
+                       sym->size, readerToFileOrdinal[sym->getFile()],
+                       sym->getName().str().data());
+      }
+    }
+  };
+
   os << "# Symbols:\n";
   os << "# Address\tSize    \tFile  Name\n";
   for (const OutputSegment *seg : outputSegments) {
     for (const OutputSection *osec : seg->getSections()) {
       if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
-        for (const InputSection *isec : concatOsec->inputs) {
-          for (Defined *sym : isec->symbols)
-            if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
-              os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
-                           sym->size, readerToFileOrdinal[sym->getFile()],
-                           sym->getName().str().data());
-        }
+        printIsecArrSyms(concatOsec->inputs);
       } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
         uint64_t lastAddr = 0; // strings will never start at address 0, so this
@@ -237,6 +243,8 @@ void macho::writeMapFile() {
         printNonLazyPointerSection(os, in.got);
       } else if (osec == in.tlvPointers) {
         printNonLazyPointerSection(os, in.tlvPointers);
+      } else if (osec == in.objcMethList) {
+        printIsecArrSyms(in.objcMethList->getInputs());
       }
       // TODO print other synthetic sections
     }
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 9fbe984e6223ec..8081605670c519 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -22,6 +22,8 @@ constexpr const char klassPropList[] = "__OBJC_$_CLASS_PROP_LIST_";
 constexpr const char metaclass[] = "_OBJC_METACLASS_$_";
 constexpr const char ehtype[] = "_OBJC_EHTYPE_$_";
 constexpr const char ivar[] = "_OBJC_IVAR_$_";
+constexpr const char instanceMethods[] = "__OBJC_$_INSTANCE_METHODS_";
+constexpr const char classMethods[] = "__OBJC_$_CLASS_METHODS_";
 constexpr const char listProprieties[] = "__OBJC_$_PROP_LIST_";
 
 constexpr const char category[] = "__OBJC_$_CATEGORY_";
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 0d8ee2a0926be2..19f8509ba714bd 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1284,6 +1284,12 @@ def fixup_chains_section : Flag<["-"], "fixup_chains_section">,
     HelpText<"This option is undocumented in ld64">,
     Flags<[HelpHidden]>,
     Group<grp_undocumented>;
+def objc_relative_method_lists : Flag<["-"], "objc_relative_method_lists">,
+    HelpText<"Emit relative method lists (more compact representation)">,
+    Group<grp_undocumented>;
+def no_objc_relative_method_lists : Flag<["-"], "no_objc_relative_method_lists">,
+    HelpText<"Don't emit relative method lists (use traditional representation)">,
+    Group<grp_undocumented>;
 def flto_codegen_only : Flag<["-"], "flto-codegen-only">,
     HelpText<"This option is undocumented in ld64">,
     Flags<[HelpHidden]>,
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 1b3694528de1dd..76836d23ce2687 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -12,6 +12,7 @@
 #include "ExportTrie.h"
 #include "InputFiles.h"
 #include "MachOStructs.h"
+#include "ObjC.h"
 #include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
@@ -1974,6 +1975,234 @@ void InitOffsetsSection::setUp() {
   }
 }
 
+ObjCMethListSection::ObjCMethListSection()
+    : SyntheticSection(segment_names::text, section_names::objcMethList) {
+  flags = S_ATTR_NO_DEAD_STRIP;
+  align = m_align;
+}
+
+// Go through all input method lists and ensure that we have selrefs for all
+// their method names. The selrefs will be needed later by ::writeTo. We need to
+// create them early on here to ensure they are processed correctly by the lld
+// pipeline.
+void ObjCMethListSection::setUp() {
+  for (const ConcatInputSection *isec : inputs) {
+    uint32_t structSizeAndFlags = 0, structCount = 0;
+    readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
+    uint32_t structSize = structSizeAndFlags & m_structSizeMask;
+
+    // Method name is immediately after header
+    uint32_t methodNameOff = m_methodListHeaderSize;
+
+    // Loop through all methods, and ensure a selref for each of them exists.
+    while (methodNameOff < isec->data.size()) {
+      const Reloc *reloc = isec->getRelocAt(methodNameOff);
+      assert(reloc && "Relocation expected at method list name slot");
+      auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+      assert(def && "Expected valid Defined at method list name slot");
+      auto *cisec = cast<CStringInputSection>(def->isec);
+      assert(cisec && "Expected method name to be in a CStringInputSection");
+      auto methname = cisec->getStringRefAtOffset(def->value);
+      if (!in.objcSelRefs->getSelRef(methname))
+        in.objcSelRefs->makeSelRef(methname);
+
+      // Jump to method name offset in next struct
+      methodNameOff += structSize;
+    }
+  }
+}
+
+// Calculate section size and final offsets for where InputSection's need to be
+// written.
+void ObjCMethListSection::finalize() {
+  // m_size will be the total size of the __objc_methlist section
+  m_size = 0;
+  for (ConcatInputSection *isec : inputs) {
+    // We can also use m_size as write offset for isec
+    assert(m_size == alignToPowerOf2(m_size, m_align) &&
+           "expected __objc_methlist to be aligned by default with the "
+           "required section alignment");
+    isec->outSecOff = m_size;
+
+    isec->isFinal = true;
+    uint32_t relativeListSize =
+        methodListSizeToRelativeMethodListSize(isec->data.size());
+    m_size += relativeListSize;
+
+    // If encoding the method list in relative offset format shrinks the size,
+    // then we also need to adjust symbol sizes to match the new size. Note that
+    // on 32bit platforms the size of the method list will remain the same when
+    // encoded in relative offset format.
+    if (relativeListSize != isec->data.size()) {
+      for (Symbol *sym : isec->symbols) {
+        assert(isa<Defined>(sym) &&
+               "Unexpected undefined symbol in ObjC method list");
+        auto *def = cast<Defined>(sym);
+        // There can be 0-size symbols, check if this is the case and ignore
+        // them.
+        if (def->size) {
+          assert(
+              def->size == isec->data.size() &&
+              "Invalid ObjC method list symbol size: expected symbol size to "
+              "match isec size");
+          def->size = relativeListSize;
+        }
+      }
+    }
+  }
+}
+
+void ObjCMethListSection::writeTo(uint8_t *bufStart) const {
+  uint8_t *buf = bufStart;
+  for (const ConcatInputSection *isec : inputs) {
+    assert(buf - bufStart == long(isec->outSecOff) &&
+           "Writing at unexpected offset");
+    uint32_t writtenSize = writeRelativeMethodList(isec, buf);
+    buf += writtenSize;
+  }
+  assert(buf - bufStart == m_size &&
+         "Written size does not match expected section size");
+}
+
+// Check if an InputSection is a method list. To do this we scan the
+// InputSection for any symbols who's names match the patterns we expect clang
+// to generate for method lists.
+bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
+  const char *symPrefixes[] = {objc::symbol_names::classMethods,
+                               objc::symbol_names::instanceMethods,
+                               objc::symbol_names::categoryInstanceMethods,
+                               objc::symbol_names::categoryClassMethods};
+  if (!isec)
+    return false;
+  for (const Symbol *sym : isec->symbols) {
+    auto *def = dyn_cast_or_null<Defined>(sym);
+    if (!def)
+      continue;
+    for (const char *prefix : symPrefixes) {
+      if (def->getName().starts_with(prefix)) {
+        assert(def->size == isec->data.size() &&
+               "Invalid ObjC method list symbol size: expected symbol size to "
+               "match isec size");
+        assert(def->value == 0 &&
+               "Offset of ObjC method list symbol must be 0");
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+// Encode a single relative offset value. The input is the data/symbol at
+// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
+// 'createSelRef' indicates that we should not directly use the specified
+// symbol, but instead get the selRef for the symbol and use that instead.
+void ObjCMethListSection::writeRelativeOffsetForIsec(
+    const ConcatInputSection *isec, uint8_t *buf, uint32_t &inSecOff,
+    uint32_t &outSecOff, bool useSelRef) const {
+  const Reloc *reloc = isec->getRelocAt(inSecOff);
+  assert(reloc && "Relocation expected at __objc_methlist Offset");
+  auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+  assert(def && "Expected all syms in __objc_methlist to be defined");
+  uint32_t symVA = def->getVA();
+
+  if (useSelRef) {
+    auto *cisec = cast<CStringInputSection>(def->isec);
+    auto methname = cisec->getStringRefAtOffset(def->value);
+    ConcatInputSection *selRef = in.objcSelRefs->getSelRef(methname);
+    assert(selRef && "Expected all selector names to already be already be "
+                     "present in __objc_selrefs");
+    symVA = selRef->getVA();
+    assert(selRef->data.size() == sizeof(target->wordSize) &&
+           "Expected one selref per ConcatInputSection");
+  }
+
+  uint32_t currentVA = isec->getVA() + outSecOff;
+  uint32_t delta = symVA - currentVA;
+  write32le(buf + outSecOff, delta);
+
+  inSecOff += target->wordSize;
+  outSecOff += sizeof(uint32_t);
+}
+
+// Write a relative method list to buf, return the size of the written
+// information
+uint32_t
+ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
+                                             uint8_t *buf) const {
+  // Copy over the header, and add the "this is a relative method list" magic
+  // value flag
+  uint32_t structSizeAndFlags = 0, structCount = 0;
+  readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
+  structSizeAndFlags |= m_relMethodHeaderFlag;
+  writeMethodListHeader(buf, structSizeAndFlags, structCount);
+
+  assert(m_methodListHeaderSize +
+                 (structCount * m_pointersPerStruct * target->wordSize) ==
+             isec->data.size() &&
+         "Invalid computed ObjC method list size");
+
+  uint32_t inSecOff = m_methodListHeaderSize;
+  uint32_t outSecOff = m_methodListHeaderSize;
+
+  // Go through the method list and encode input absolute pointers as relative
+  // offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
+  // outSecOff
+  for (uint32_t i = 0; i < structCount; i++) {
+    // Write the name of the method
+    writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, true);
+    // Write the type of the method
+    writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
+    // Write reference to the selector of the method
+    writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
+  }
+
+  // Expecting to have read all the data in the isec
+  assert(inSecOff == isec->data.size() &&
+         "Invalid actual ObjC method list size");
+  assert(
+      outSecOff == methodListSizeToRelativeMethodListSize(inSecOff) &&
+      "Mismatch between input & output size when writing relative method list");
+  return outSecOff;
+}
+
+// Given the size of an ObjC method list InputSection, return the size of the
+// method list when encoded in relative offsets format. We can do this without
+// decoding the actual data, as it can be directly infered from the size of the
+// isec.
+uint32_t
+ObjCMethListSection::methodListSizeToRelativeMethodListSize(uint32_t iSecSize) {
+  uint32_t oldPointersSize = iSecSize - m_methodListHeaderSize;
+  uint32_t pointerCount = oldPointersSize / target->wordSize;
+  assert(((pointerCount % m_pointersPerStruct) == 0) &&
+         "__objc_methlist expects method lists to have multiple-of-3 pointers");
+
+  constexpr uint32_t sizeOfRelativeOffset = sizeof(uint32_t);
+  uint32_t newPointersSize = pointerCount * sizeOfRelativeOffset;
+  uint32_t newTotalSize = m_methodListHeaderSize + newPointersSize;
+
+  assert((newTotalSize <= iSecSize) &&
+         "Expected relative method list size to be smaller or equal than "
+         "original size");
+  return newTotalSize;
+}
+
+// Read a method list header from buf
+void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
+                                               uint32_t &structSizeAndFlags,
+                                               uint32_t &structCount) {
+  structSizeAndFlags = read32le(buf);
+  structCount = read32le(buf + sizeof(uint32_t));
+}
+
+// Write a method list header to buf
+void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
+                                                uint32_t structSizeAndFlags,
+                                                uint32_t structCount) {
+  write32le(buf, structSizeAndFlags);
+  write32le(buf + sizeof(structSizeAndFlags), structCount);
+}
+
 void macho::createSyntheticSymbols() {
   auto addHeaderSymbol = [](const char *name) {
     symtab->addSynthetic(name, in.header->isec, /*value=*/0,
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 6d85f0aea8e04c..5d880513397423 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -692,6 +692,53 @@ class InitOffsetsSection final : public SyntheticSection {
   std::vector<ConcatInputSection *> sections;
 };
 
+// This SyntheticSection is for the __objc_methlist section, which contains
+// relative method lists if the -objc_relative_method_lists option is enabled.
+class ObjCMethListSection final : public SyntheticSection {
+public:
+  ObjCMethListSection();
+
+  static bool isMethodList(const ConcatInputSection *isec);
+  void addInput(ConcatInputSection *isec) { inputs.push_back(isec); }
+  std::vector<ConcatInputSection *> getInputs() { return inputs; }
+
+  void setUp();
+  void finalize() override;
+  bool isNeeded() const override { return !inputs.empty(); }
+  uint64_t getSize() const override { return m_size; }
+  void writeTo(uint8_t *bufStart) const override;
+
+private:
+  static void readMethodListHeader(const uint8_t *buf,
+                                   uint32_t &structSizeAndFlags,
+                                   uint32_t &structCount);
+  static void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags,
+                                    uint32_t structCount);
+  static uint32_t methodListSizeToRelativeMethodListSize(uint32_t iSecSize);
+  void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf,
+                                  uint32_t &inSecOff, uint32_t &outSecOff,
+                                  bool useSelRef) const;
+  uint32_t writeRelativeMethodList(const ConcatInputSection *isec,
+                                   uint8_t *buf) const;
+
+  static constexpr uint32_t m_methodListHeaderSize =
+      /*structSizeAndFlags*/ sizeof(uint32_t) +
+      /*structCount*/ sizeof(uint32_t);
+  // Relative method lists are supported only for 3-pointer method lists
+  static constexpr uint32_t m_pointersPerStruct = 3;
+  // The runtime identifies relative method lists via this magic value
+  static constexpr uint32_t m_relMethodHeaderFlag = 0x80000000;
+  // In the method list header, the first 2 bytes are the size of struct
+  static constexpr uint32_t m_structSizeMask = 0xFFFF;
+  // Relative method lists have 4 byte alignment as all data in the InputSection
+  // is 4 byte
+  static constexpr uint32_t m_align = 4;
+
+  // The output size of the __objc_methlist section, computed during finalize()
+  uint32_t m_size = 0;
+  std::vector<ConcatInputSection *> inputs;
+};
+
 // Chained fixups are a replacement for classic dyld opcodes. In this format,
 // most of the metadata necessary for binding symbols and rebasing addresses is
 // stored directly in the memory location that will have the fixup applied.
@@ -819,6 +866,7 @@ struct InStruct {
   ObjCImageInfoSection *objCImageInfo = nullptr;
   ConcatInputSection *imageLoaderCache = nullptr;
   InitOffsetsSection *initOffsets = nullptr;
+  ObjCMethListSection *objcMet...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/86231


More information about the llvm-commits mailing list