[lld] [lld-macho] Implement support for ObjC relative method lists (PR #86231)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 25 10:13:57 PDT 2024


================
@@ -1974,6 +1975,234 @@ void InitOffsetsSection::setUp() {
   }
 }
 
+ObjCMethListSection::ObjCMethListSection()
+    : SyntheticSection(segment_names::text, section_names::objcMethList) {
+  flags = S_ATTR_NO_DEAD_STRIP;
+  align = m_align;
+}
+
+// Go through all input method lists and ensure that we have selrefs for all
+// their method names. The selrefs will be needed later by ::writeTo. We need to
+// create them early on here to ensure they are processed correctly by the lld
+// pipeline.
+void ObjCMethListSection::setUp() {
+  for (const ConcatInputSection *isec : inputs) {
+    uint32_t structSizeAndFlags = 0, structCount = 0;
+    readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
+    uint32_t structSize = structSizeAndFlags & m_structSizeMask;
+
+    // Method name is immediately after header
+    uint32_t methodNameOff = m_methodListHeaderSize;
+
+    // Loop through all methods, and ensure a selref for each of them exists.
+    while (methodNameOff < isec->data.size()) {
+      const Reloc *reloc = isec->getRelocAt(methodNameOff);
+      assert(reloc && "Relocation expected at method list name slot");
+      auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+      assert(def && "Expected valid Defined at method list name slot");
+      auto *cisec = cast<CStringInputSection>(def->isec);
+      assert(cisec && "Expected method name to be in a CStringInputSection");
+      auto methname = cisec->getStringRefAtOffset(def->value);
+      if (!in.objcSelRefs->getSelRef(methname))
+        in.objcSelRefs->makeSelRef(methname);
+
+      // Jump to method name offset in next struct
+      methodNameOff += structSize;
+    }
+  }
+}
+
+// Calculate section size and final offsets for where InputSection's need to be
+// written.
+void ObjCMethListSection::finalize() {
+  // m_size will be the total size of the __objc_methlist section
+  m_size = 0;
+  for (ConcatInputSection *isec : inputs) {
+    // We can also use m_size as write offset for isec
+    assert(m_size == alignToPowerOf2(m_size, m_align) &&
+           "expected __objc_methlist to be aligned by default with the "
+           "required section alignment");
+    isec->outSecOff = m_size;
+
+    isec->isFinal = true;
+    uint32_t relativeListSize =
+        methodListSizeToRelativeMethodListSize(isec->data.size());
+    m_size += relativeListSize;
+
+    // If encoding the method list in relative offset format shrinks the size,
+    // then we also need to adjust symbol sizes to match the new size. Note that
+    // on 32bit platforms the size of the method list will remain the same when
+    // encoded in relative offset format.
+    if (relativeListSize != isec->data.size()) {
+      for (Symbol *sym : isec->symbols) {
+        assert(isa<Defined>(sym) &&
+               "Unexpected undefined symbol in ObjC method list");
+        auto *def = cast<Defined>(sym);
+        // There can be 0-size symbols, check if this is the case and ignore
+        // them.
+        if (def->size) {
+          assert(
+              def->size == isec->data.size() &&
+              "Invalid ObjC method list symbol size: expected symbol size to "
+              "match isec size");
+          def->size = relativeListSize;
+        }
+      }
+    }
+  }
+}
+
+void ObjCMethListSection::writeTo(uint8_t *bufStart) const {
+  uint8_t *buf = bufStart;
+  for (const ConcatInputSection *isec : inputs) {
+    assert(buf - bufStart == long(isec->outSecOff) &&
+           "Writing at unexpected offset");
+    uint32_t writtenSize = writeRelativeMethodList(isec, buf);
+    buf += writtenSize;
+  }
+  assert(buf - bufStart == m_size &&
+         "Written size does not match expected section size");
+}
+
+// Check if an InputSection is a method list. To do this we scan the
+// InputSection for any symbols who's names match the patterns we expect clang
+// to generate for method lists.
+bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
+  const char *symPrefixes[] = {objc::symbol_names::classMethods,
+                               objc::symbol_names::instanceMethods,
+                               objc::symbol_names::categoryInstanceMethods,
+                               objc::symbol_names::categoryClassMethods};
+  if (!isec)
+    return false;
+  for (const Symbol *sym : isec->symbols) {
+    auto *def = dyn_cast_or_null<Defined>(sym);
+    if (!def)
+      continue;
+    for (const char *prefix : symPrefixes) {
+      if (def->getName().starts_with(prefix)) {
+        assert(def->size == isec->data.size() &&
+               "Invalid ObjC method list symbol size: expected symbol size to "
+               "match isec size");
+        assert(def->value == 0 &&
+               "Offset of ObjC method list symbol must be 0");
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+// Encode a single relative offset value. The input is the data/symbol at
+// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
+// 'createSelRef' indicates that we should not directly use the specified
+// symbol, but instead get the selRef for the symbol and use that instead.
+void ObjCMethListSection::writeRelativeOffsetForIsec(
+    const ConcatInputSection *isec, uint8_t *buf, uint32_t &inSecOff,
+    uint32_t &outSecOff, bool useSelRef) const {
+  const Reloc *reloc = isec->getRelocAt(inSecOff);
+  assert(reloc && "Relocation expected at __objc_methlist Offset");
+  auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+  assert(def && "Expected all syms in __objc_methlist to be defined");
+  uint32_t symVA = def->getVA();
+
+  if (useSelRef) {
+    auto *cisec = cast<CStringInputSection>(def->isec);
+    auto methname = cisec->getStringRefAtOffset(def->value);
+    ConcatInputSection *selRef = in.objcSelRefs->getSelRef(methname);
+    assert(selRef && "Expected all selector names to already be already be "
+                     "present in __objc_selrefs");
+    symVA = selRef->getVA();
+    assert(selRef->data.size() == sizeof(target->wordSize) &&
+           "Expected one selref per ConcatInputSection");
+  }
+
+  uint32_t currentVA = isec->getVA() + outSecOff;
+  uint32_t delta = symVA - currentVA;
+  write32le(buf + outSecOff, delta);
+
+  inSecOff += target->wordSize;
+  outSecOff += sizeof(uint32_t);
+}
+
+// Write a relative method list to buf, return the size of the written
+// information
+uint32_t
+ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
+                                             uint8_t *buf) const {
+  // Copy over the header, and add the "this is a relative method list" magic
+  // value flag
+  uint32_t structSizeAndFlags = 0, structCount = 0;
+  readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
+  structSizeAndFlags |= m_relMethodHeaderFlag;
+  writeMethodListHeader(buf, structSizeAndFlags, structCount);
+
+  assert(m_methodListHeaderSize +
+                 (structCount * m_pointersPerStruct * target->wordSize) ==
+             isec->data.size() &&
+         "Invalid computed ObjC method list size");
+
+  uint32_t inSecOff = m_methodListHeaderSize;
+  uint32_t outSecOff = m_methodListHeaderSize;
+
+  // Go through the method list and encode input absolute pointers as relative
+  // offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
+  // outSecOff
+  for (uint32_t i = 0; i < structCount; i++) {
+    // Write the name of the method
+    writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, true);
+    // Write the type of the method
+    writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
+    // Write reference to the selector of the method
+    writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
+  }
+
+  // Expecting to have read all the data in the isec
+  assert(inSecOff == isec->data.size() &&
+         "Invalid actual ObjC method list size");
+  assert(
+      outSecOff == methodListSizeToRelativeMethodListSize(inSecOff) &&
+      "Mismatch between input & output size when writing relative method list");
+  return outSecOff;
+}
+
+// Given the size of an ObjC method list InputSection, return the size of the
+// method list when encoded in relative offsets format. We can do this without
+// decoding the actual data, as it can be directly infered from the size of the
----------------
kyulee-com wrote:

infered -> inferred

https://github.com/llvm/llvm-project/pull/86231


More information about the llvm-commits mailing list