[lld] [lld-macho] Implement ObjC category merging (-objc_category_merging) (PR #82928)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 29 02:01:59 PST 2024


================
@@ -320,3 +342,1007 @@ void objc::checkCategories() {
       }
   }
 }
+
+namespace {
+
+class ObjcCategoryMerger {
+  // Information about an input category
+  struct InfoInputCategory {
+    ConcatInputSection *catBodyIsec;
+    ConcatInputSection *catListIsec;
+    uint32_t offCatListIsec = 0;
+
+    bool wasMerged = false;
+  };
+
+  // To write new (merged) categories or classes, we will try make limited
+  // assumptions about the alignment and the sections the various class/category
+  // info are stored in and . So we'll just reuse the same sections and
+  // alignment as already used in existing (input) categories. To do this we
+  // have InfoCategoryWriter which contains the various sections that the
+  // generated categories will be written to.
+  template <typename T> struct InfroWriteSection {
+    bool valid = false; // Data has been successfully collected from input
+    uint32_t align = 0;
+    const Section *inputSection;
+    Reloc relocTemplate;
+    T *outputSection;
+  };
+
+  struct InfoCategoryWriter {
+    InfroWriteSection<ConcatOutputSection> catListInfo;
+    InfroWriteSection<CStringSection> catNameInfo;
+    InfroWriteSection<ConcatOutputSection> catBodyInfo;
+    InfroWriteSection<ConcatOutputSection> catPtrListInfo;
+  };
+
+  // Information about a pointer list in the original categories (method lists,
+  // protocol lists, etc)
+  struct PointerListInfo {
+    PointerListInfo(const char *pszSymNamePrefix)
+        : namePrefix(pszSymNamePrefix) {}
+    const char *namePrefix;
+
+    uint32_t structSize = 0;
+    uint32_t structCount = 0;
+
+    std::vector<Symbol *> allPtrs;
+  };
+
+  // Full information about all the categories that are extending a class. This
+  // will have all the additional methods, protocols, proprieties that are
+  // contained in all the categories that extend a particular class.
+  struct ClassExtensionInfo {
+    // Merged names of containers. Ex: base|firstCategory|secondCategory|...
+    std::string mergedContainerName;
+    std::string baseClassName;
+    Symbol *baseClass = nullptr;
+    // In case we generate new data, mark the new data as belonging to this file
+    ObjFile *objFileForMergeData = nullptr;
+
+    PointerListInfo instanceMethods = "__OBJC_$_CATEGORY_INSTANCE_METHODS_";
+    PointerListInfo classMethods = "__OBJC_$_CATEGORY_CLASS_METHODS_";
+    PointerListInfo protocols = "__OBJC_CATEGORY_PROTOCOLS_$_";
+    PointerListInfo instanceProps = "__OBJC_$_PROP_LIST_";
+    PointerListInfo classProps = "__OBJC_$_CLASS_PROP_LIST_";
+  };
+
+public:
+  ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
+  bool doMerge();
+
+private:
+  // This returns bool and always false for easy 'return false;' statements
+  bool registerError(const char *msg);
+
+  bool collectAndValidateCategoriesData();
+  bool
+  mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+  bool eraseMergedCategories();
+
+  bool generateCatListForNonErasedCategories(
+      std::map<ConcatInputSection *, std::set<uint64_t>>
+          catListToErasedOffsets);
+  template <typename T>
+  bool collectSectionWriteInfoFromIsec(InputSection *isec,
+                                       InfroWriteSection<T> &catWriteInfo);
+  bool collectCategoryWriterInfoFromCategory(InfoInputCategory &catInfo);
+  bool parseCatInfoToExtInfo(InfoInputCategory &catInfo,
+                             ClassExtensionInfo &extInfo);
+
+  bool tryParseProtocolListInfo(ConcatInputSection *isec,
+                                uint32_t symbolsPerStruct,
+                                PointerListInfo &ptrList);
+
+  bool parsePointerListInfo(ConcatInputSection *isec, uint32_t secOffset,
+                            uint32_t symbolsPerStruct,
+                            PointerListInfo &ptrList);
+
+  bool emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
+                              ClassExtensionInfo &extInfo,
+                              PointerListInfo &ptrList);
+
+  bool emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
+                               ClassExtensionInfo &extInfo,
+                               PointerListInfo &ptrList);
+
+  bool emitCategory(ClassExtensionInfo &extInfo, Defined *&catBodySym);
+  bool emitCatListEntrySec(std::string &forCateogryName,
+                           std::string &forBaseClassName, ObjFile *objFile,
+                           Defined *&catListSym);
+  bool emitCategoryBody(std::string &name, Defined *nameSym,
+                        Symbol *baseClassSym, std::string &baseClassName,
+                        ObjFile *objFile, Defined *&catBodySym);
+  bool emitCategoryName(std::string &name, ObjFile *objFile,
+                        Defined *&catNameSym);
+  bool createSymbolReference(Defined *refFrom, Symbol *refTo, uint32_t offset,
+                             Reloc &relocTemplate);
+  bool tryGetSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+                                Symbol *&sym);
+  bool tryGetDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+                                 Defined *&defined);
+  bool tryEraseDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+                                   bool stringOnly = false);
+
+  CategoryLayout catLayout;
+  ClassLayout classLayout;
+  ROClassLayout roClassLayout;
+  ListHeaderLayout listHeaderLayout;
+  MethodLayout methodLayout;
+  ProtocolListHeaderLayout protocolListHeaderLayout;
+
+  InfoCategoryWriter infoCategoryWriter;
+  std::vector<ConcatInputSection *> &allInputSections;
+  // Map of base class Symbol to list of InfoInputCategory's for it
+  std::map<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+
+  // Normally, the binary data comes from the input files, but since we're
+  // generating binary data ourselves, we use the below arrays to store it in.
+  // Need this to be 'static' so the data survives past the ObjcCategoryMerger
+  // object, as the data will be read by the Writer when the final binary is
+  // generated.
+  static SmallVector<SmallString<0>> generatedNames;
+  static SmallVector<SmallVector<uint8_t>> generatedSectionData;
+};
+
+SmallVector<SmallString<0>> ObjcCategoryMerger::generatedNames;
+SmallVector<SmallVector<uint8_t>> ObjcCategoryMerger::generatedSectionData;
+
+ObjcCategoryMerger::ObjcCategoryMerger(
+    std::vector<ConcatInputSection *> &_allInputSections)
+    : catLayout(target->wordSize), classLayout(target->wordSize),
+      roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
+      methodLayout(target->wordSize),
+      protocolListHeaderLayout(target->wordSize),
+      allInputSections(_allInputSections) {}
+
+bool ObjcCategoryMerger::registerError(const char *msg) {
+  std::string err = "ObjC category merging error[-merge-objc-categories]: ";
+  err += msg;
+  error(err);
+  return false; // Always return false for easy 'return registerError()' syntax.
+}
+
+// This is a template so that it can be used both for CStringSection and
+// ConcatOutputSection
+template <typename T>
+bool ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
+    InputSection *isec, InfroWriteSection<T> &catWriteInfo) {
+  if (catWriteInfo.valid)
+    return true;
+
+  catWriteInfo.inputSection = &isec->section;
+  catWriteInfo.align = isec->align;
+  catWriteInfo.outputSection = dyn_cast_or_null<T>(isec->parent);
+
+  if (isec->relocs.size())
+    catWriteInfo.relocTemplate = isec->relocs[0];
+
+  if (!catWriteInfo.outputSection) {
+    std::string message =
+        "Unexpected output section type for" + isec->getName().str();
+    return registerError(message.c_str());
+  }
+
+  catWriteInfo.valid = true;
+
+  return true;
+}
+
+bool ObjcCategoryMerger::tryGetSymbolAtIsecOffset(ConcatInputSection *isec,
+                                                  uint32_t offset,
+                                                  Symbol *&sym) {
+  const Reloc *reloc = isec->getRelocAt(offset);
+
+  if (!reloc)
+    return false;
+
+  sym = reloc->referent.get<Symbol *>();
+  return sym != nullptr;
+}
+
+bool ObjcCategoryMerger::tryGetDefinedAtIsecOffset(ConcatInputSection *isec,
+                                                   uint32_t offset,
+                                                   Defined *&defined) {
+  Symbol *sym;
+  if (!tryGetSymbolAtIsecOffset(isec, offset, sym))
+    return false;
+
+  defined = dyn_cast_or_null<Defined>(sym);
+  return defined != nullptr;
+}
+
+// Given an ConcatInputSection and an offset, if there is a symbol(Defined) at
+// that offset, then erase the symbol (mark it not live) from the final output.
+// Used for easely erasing already merged strings, method lists, etc ...
+bool ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(ConcatInputSection *isec,
+                                                     uint32_t offset,
+                                                     bool stringOnly) {
+  const Reloc *reloc = isec->getRelocAt(offset);
+
+  if (!reloc)
+    return false;
+
+  Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+
+  if (!sym)
+    return false;
+
+  auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec);
+  if (!stringOnly && cisec) {
+    cisec->linkerOptimizeReason = LinkerOptReason::CategoryMerging;
+    return true;
+  }
+
+  if (auto *cisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
+    uint32_t totalOffset = sym->value + reloc->addend;
+    StringPiece &piece = cisec->getStringPiece(totalOffset);
+    piece.linkerOptimizeReason = LinkerOptReason::CategoryMerging;
+    return true;
+  }
+
+  return false;
+}
+
+bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
+    InfoInputCategory &catInfo) {
+
+  if (!collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+          catInfo.catListIsec, infoCategoryWriter.catListInfo))
+    return false;
+  if (!collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+          catInfo.catBodyIsec, infoCategoryWriter.catBodyInfo))
+    return false;
+
+  if (!infoCategoryWriter.catNameInfo.valid) {
+    const Reloc *catNameReloc =
+        catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
+
+    if (!catNameReloc)
+      return registerError("Category does not have a reloc at nameOffset");
+
+    lld::macho::Defined *catDefSym =
+        dyn_cast_or_null<Defined>(catNameReloc->referent.dyn_cast<Symbol *>());
+    if (!catDefSym)
+      return registerError(
+          "Reloc of category name is not a valid Defined symbol");
+
+    if (!collectSectionWriteInfoFromIsec<CStringSection>(
+            catDefSym->isec, infoCategoryWriter.catNameInfo))
+      return false;
+  }
+
+  // Collect writer info from all the category lists (we're assuming they all
+  // would provide the same info)
+  if (!infoCategoryWriter.catPtrListInfo.valid) {
+    for (uint32_t off = catLayout.instanceMethodsOffset;
+         off <= catLayout.classPropsOffset; off += target->wordSize) {
+      Defined *ptrList;
+      if (tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off, ptrList)) {
+        if (!collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+                ptrList->isec, infoCategoryWriter.catPtrListInfo))
+          return false;
+        break;
+      }
+    }
+  }
+
+  return true;
+}
+
+// Parse a protocol list that might be linked to at a ConcatInputSection given
+// offset. The format of the protocol list is different than other lists (prop
+// lists, method lists) so we need to parse it differently
+bool ObjcCategoryMerger::tryParseProtocolListInfo(ConcatInputSection *isec,
+                                                  uint32_t secOffset,
+                                                  PointerListInfo &ptrList) {
+  if (!isec || (secOffset + target->wordSize > isec->data.size()))
+    return registerError(
+        "Tried to read pointer list beyond protocol section end");
+
+  const Reloc *reloc = isec->getRelocAt(secOffset);
+  if (!reloc)
+    return true; // List is null, return true because no m_error
+
+  auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+  if (!ptrListSym)
+    return registerError("Protocol list reloc does not have a valid Defined");
+
+  // Theoretically protocol count can be either 32b or 64b, but reading the
+  // first 32b is good enough
+  uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
+      ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+
+  ptrList.structCount += protocolCount;
+  ptrList.structSize = target->wordSize;
+
+  uint32_t expectedListSize =
+      (protocolCount * target->wordSize) +
+      /*header(count)*/ protocolListHeaderLayout.totalSize +
+      /*extra null value*/ target->wordSize;
+  if (expectedListSize != ptrListSym->isec->data.size())
+    return registerError("Protocol list does not match expected size");
+
+  uint32_t off = protocolListHeaderLayout.totalSize;
+  for (uint32_t inx = 0; inx < protocolCount; inx++) {
+    const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+    if (!reloc)
+      return registerError("No reloc found at protocol list offset");
+
+    auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+    if (!listSym)
+      return registerError("Protocol list reloc does not have a valid Defined");
+
+    ptrList.allPtrs.push_back(listSym);
+    off += target->wordSize;
+  }
+
+  return true;
+}
+
+// Parse a pointer list that might be linked to at a ConcatInputSection given
+// offset. This can be used for instance methods, class methods, instance props
+// and class props since they have the same format.
+bool ObjcCategoryMerger::parsePointerListInfo(ConcatInputSection *isec,
+                                              uint32_t secOffset,
+                                              uint32_t symbolsPerStruct,
+                                              PointerListInfo &ptrList) {
+  assert(symbolsPerStruct == 2 || symbolsPerStruct == 3);
+  if (!isec || (secOffset + target->wordSize > isec->data.size()))
+    return registerError("Tried to read pointer list beyond section end");
+
+  const Reloc *reloc = isec->getRelocAt(secOffset);
+  if (!reloc)
+    return true; // No reloc found, nothing to parse, so return success
+
+  auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+  if (!ptrListSym)
+    return registerError("Reloc does not have a valid Defined");
+
+  uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
+      ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+  uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
+      ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
+
+  assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
+
+  ptrList.structCount += thisStructCount;
+  ptrList.structSize = thisStructSize;
+
+  uint32_t expectedListSize =
+      listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
+
+  if (expectedListSize != ptrListSym->isec->data.size())
+    return registerError("Pointer list does not match expected size");
+
+  for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
+       off += target->wordSize) {
+    const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+    if (!reloc)
+      return registerError("No reloc found at pointer list offset");
+
+    auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+    if (!listSym)
+      return registerError("Reloc does not have a valid Defined");
+
+    ptrList.allPtrs.push_back(listSym);
+  }
+
+  return true;
+}
+
+// Here we parse all the information of an input category (catInfo) and
+// append-store the parsed info into the strucutre which will contain all the
+// information about how a class is extended (extInfo)
+bool ObjcCategoryMerger::parseCatInfoToExtInfo(InfoInputCategory &catInfo,
+                                               ClassExtensionInfo &extInfo) {
+  const Reloc *catNameReloc =
+      catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
+
+  //// Parse name ///////////////////////////////////////////////////////////
+  if (!catNameReloc)
+    return registerError("Category does not have a reloc at 'nameOffset'");
+
+  if (!extInfo.mergedContainerName.empty())
+    extInfo.mergedContainerName += "|";
+
+  if (!extInfo.objFileForMergeData)
+    extInfo.objFileForMergeData =
+        dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
+
+  StringRef catName = getReferentString(*catNameReloc);
+  extInfo.mergedContainerName += catName.str();
+
+  //// Parse base class /////////////////////////////////////////////////////
+  const Reloc *klassReloc =
+      catInfo.catBodyIsec->getRelocAt(catLayout.klassOffset);
+
+  if (!klassReloc)
+    return registerError("Category does not have a reloc at 'klassOffset'");
+
+  Symbol *classSym = klassReloc->referent.get<Symbol *>();
+
+  if (extInfo.baseClass && extInfo.baseClass != classSym)
+    return registerError("Trying to parse category info into container with "
+                         "different base class");
+
+  extInfo.baseClass = classSym;
+
+  if (extInfo.baseClassName.empty()) {
+    llvm::StringRef classPrefix("_OBJC_CLASS_$_");
+    if (!classSym->getName().starts_with(classPrefix))
+      return registerError(
+          "Base class symbol does not start with '_OBJC_CLASS_$_'");
+
+    extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
+  }
+
+  if (!parsePointerListInfo(catInfo.catBodyIsec,
+                            catLayout.instanceMethodsOffset,
+                            /*symbolsPerStruct=*/3, extInfo.instanceMethods))
+    return false;
+
+  if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
+                            /*symbolsPerStruct=*/3, extInfo.classMethods))
+    return false;
+
+  if (!tryParseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
+                                extInfo.protocols))
+    return false;
+
+  if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
+                            /*symbolsPerStruct=*/2, extInfo.instanceProps))
+    return false;
+
+  if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
+                            /*symbolsPerStruct=*/2, extInfo.classProps))
+    return false;
+
+  return true;
+}
+
+// Generate a protocol list (including header) and link it into the parent at
+// the specified offset.
+bool ObjcCategoryMerger::emitAndLinkProtocolList(Defined *parentSym,
+                                                 uint32_t linkAtOffset,
+                                                 ClassExtensionInfo &extInfo,
+                                                 PointerListInfo &ptrList) {
+  if (ptrList.allPtrs.empty())
+    return true;
+
+  assert(ptrList.allPtrs.size() == ptrList.structCount);
+
+  uint32_t bodySize = (ptrList.structCount * target->wordSize) +
+                      /*header(count)*/ protocolListHeaderLayout.totalSize +
+                      /*extra null value*/ target->wordSize;
+  generatedSectionData.push_back(SmallVector<uint8_t>(bodySize, 0));
+  llvm::ArrayRef<uint8_t> bodyData = generatedSectionData.back();
+
+  // This theoretically can be either 32b or 64b, but writing just the first 32b
+  // is good enough
+  const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
+      bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
+
+  *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
+
+  ConcatInputSection *listSec = make<ConcatInputSection>(
+      *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
+      infoCategoryWriter.catPtrListInfo.align);
+  listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+  listSec->live = true;
+  allInputSections.push_back(listSec);
+
+  listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+
+  generatedNames.push_back(StringRef(ptrList.namePrefix));
+  auto &symName = generatedNames.back();
+  symName += extInfo.baseClassName + "_$_(" + extInfo.mergedContainerName + ")";
+
+  Defined *ptrListSym = make<Defined>(
+      symName.c_str(), /*file=*/parentSym->getObjectFile(), listSec,
+      /*value=*/0, bodyData.size(),
+      /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+      /*includeInSymtab=*/true, /*isReferencedDynamically=*/false,
+      /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
+
+  ptrListSym->used = true;
+  parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+
+  if (!createSymbolReference(parentSym, ptrListSym, linkAtOffset,
+                             infoCategoryWriter.catBodyInfo.relocTemplate))
+    return false;
+
+  uint32_t offset = protocolListHeaderLayout.totalSize;
+  for (Symbol *symbol : ptrList.allPtrs) {
+    if (!createSymbolReference(ptrListSym, symbol, offset,
+                               infoCategoryWriter.catPtrListInfo.relocTemplate))
+      return false;
+
+    offset += target->wordSize;
+  }
+
+  return true;
+}
+
+// Generate a pointer list (including header) and link it into the parent at the
+// specified offset. This is used for instance and class methods and
+// proprieties.
+bool ObjcCategoryMerger::emitAndLinkPointerList(Defined *parentSym,
+                                                uint32_t linkAtOffset,
+                                                ClassExtensionInfo &extInfo,
+                                                PointerListInfo &ptrList) {
+  if (ptrList.allPtrs.empty())
+    return true;
+
+  assert(ptrList.allPtrs.size() * target->wordSize ==
+         ptrList.structCount * ptrList.structSize);
+
+  // Generate body
+  uint32_t bodySize =
+      listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
+  generatedSectionData.push_back(SmallVector<uint8_t>(bodySize, 0));
+  llvm::ArrayRef<uint8_t> bodyData = generatedSectionData.back();
+
+  const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
+      bodyData.data() + listHeaderLayout.structSizeOffset);
+  const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
+      bodyData.data() + listHeaderLayout.structCountOffset);
+
+  *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
+  *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
+
+  ConcatInputSection *listSec = make<ConcatInputSection>(
+      *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
+      infoCategoryWriter.catPtrListInfo.align);
+  listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+  listSec->live = true;
+  allInputSections.push_back(listSec);
+
+  listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+
+  generatedNames.push_back(StringRef(ptrList.namePrefix));
+  auto &symName = generatedNames.back();
+  symName += extInfo.baseClassName + "_$_" + extInfo.mergedContainerName;
+
+  Defined *ptrListSym = make<Defined>(
+      symName.c_str(), /*file=*/parentSym->getObjectFile(), listSec,
+      /*value=*/0, bodyData.size(),
+      /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+      /*includeInSymtab=*/true, /*isReferencedDynamically=*/false,
+      /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
+
+  ptrListSym->used = true;
+  parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+
+  if (!createSymbolReference(parentSym, ptrListSym, linkAtOffset,
+                             infoCategoryWriter.catBodyInfo.relocTemplate))
+    return false;
+
+  uint32_t offset = listHeaderLayout.totalSize;
+  for (Symbol *symbol : ptrList.allPtrs) {
+    if (!createSymbolReference(ptrListSym, symbol, offset,
+                               infoCategoryWriter.catPtrListInfo.relocTemplate))
+      return false;
+
+    offset += target->wordSize;
+  }
+
+  return true;
+}
+
+// This method creates an __objc_catlist ConcatInputSection with a single slot
+bool ObjcCategoryMerger::emitCatListEntrySec(std::string &forCateogryName,
+                                             std::string &forBaseClassName,
+                                             ObjFile *objFile,
+                                             Defined *&catListSym) {
+  uint32_t sectionSize = target->wordSize;
+  generatedSectionData.push_back(SmallVector<uint8_t>(sectionSize, 0));
+  llvm::ArrayRef<uint8_t> bodyData = generatedSectionData.back();
+
+  ConcatInputSection *newCatList =
+      make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
+                               bodyData, infoCategoryWriter.catListInfo.align);
+  newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
+  newCatList->live = true;
+  allInputSections.push_back(newCatList);
+
+  newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
+
+  SmallString<0> catSymName;
+  catSymName += "<__objc_catlist slot for merged category ";
+  catSymName += forBaseClassName + "(" + forCateogryName + ")>";
+  generatedNames.push_back(StringRef(catSymName));
+
+  catListSym = make<Defined>(
+      StringRef(generatedNames.back()), /*file=*/objFile, newCatList,
+      /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
+      /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
+      /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
+      /*isWeakDefCanBeHidden=*/false);
+
+  catListSym->used = true;
+  objFile->symbols.push_back(catListSym);
+  return true;
+}
+
+// Here we generate the main category body and just the body and link the name
+// and base class into it. We don't link any other info like the protocol and
+// class/instance methods/props.
+bool ObjcCategoryMerger::emitCategoryBody(std::string &name, Defined *nameSym,
+                                          Symbol *baseClassSym,
+                                          std::string &baseClassName,
+                                          ObjFile *objFile,
+                                          Defined *&catBodySym) {
+  generatedSectionData.push_back(SmallVector<uint8_t>(catLayout.totalSize, 0));
+  llvm::ArrayRef<uint8_t> bodyData = generatedSectionData.back();
+
+  uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
+                                   catLayout.sizeOffset);
+  *ptrSize = catLayout.totalSize;
+
+  ConcatInputSection *newBodySec =
+      make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
+                               bodyData, infoCategoryWriter.catBodyInfo.align);
+  newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
+  newBodySec->live = true;
+  allInputSections.push_back(newBodySec);
+
+  newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
+
+  std::string symName =
+      "__OBJC_$_CATEGORY_" + baseClassName + "_$_(" + name + ")";
+  generatedNames.push_back(StringRef(symName));
+  catBodySym = make<Defined>(
+      StringRef(generatedNames.back()), /*file=*/objFile, newBodySec,
+      /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
+      /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
+      /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
+      /*isWeakDefCanBeHidden=*/false);
+
+  catBodySym->used = true;
+  objFile->symbols.push_back(catBodySym);
+
+  if (!createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
+                             infoCategoryWriter.catBodyInfo.relocTemplate))
+    return false;
+
+  // Create a reloc to the base class (either external or internal)
+  if (!createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
+                             infoCategoryWriter.catBodyInfo.relocTemplate))
+    return false;
+
+  return true;
+}
+
+// This writes the new category name (for the merged category) into the binary
+// and returns the sybmol for it.
+bool ObjcCategoryMerger::emitCategoryName(std::string &name, ObjFile *objFile,
+                                          Defined *&catNamdeSym) {
+  llvm::ArrayRef<uint8_t> inputNameArrData(
+      reinterpret_cast<const uint8_t *>(name.c_str()), name.size() + 1);
+  generatedSectionData.push_back(SmallVector<uint8_t>(inputNameArrData));
+
+  llvm::ArrayRef<uint8_t> nameData = generatedSectionData.back();
+
+  CStringInputSection *newStringSec = make<CStringInputSection>(
+      *infoCategoryWriter.catNameInfo.inputSection, nameData,
+      infoCategoryWriter.catNameInfo.align, true);
+
+  newStringSec->splitIntoPieces();
+  newStringSec->pieces[0].live = true;
+  newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
+
+  catNamdeSym = make<Defined>(
+      "<merged category name>", /*file=*/objFile, newStringSec,
+      /*value=*/0, nameData.size(),
+      /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+      /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
+      /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
+
+  catNamdeSym->used = true;
+  objFile->symbols.push_back(catNamdeSym);
+  return true;
+}
+
+// This method fully creates a new category from the given ClassExtensionInfo.
+// It creates the category body, name and protocol/method/prop lists an links
+// everything together. Then it creates a new __objc_catlist entry and links the
+// category into it. Calling this method will fully generate a category which
+// will be available in the final binary.
+bool ObjcCategoryMerger::emitCategory(ClassExtensionInfo &extInfo,
+                                      Defined *&catBodySym) {
+  Defined *catNameSym = nullptr;
+  if (!emitCategoryName(extInfo.mergedContainerName,
+                        extInfo.objFileForMergeData, catNameSym))
+    return false;
+
+  if (!emitCategoryBody(extInfo.mergedContainerName, catNameSym,
+                        extInfo.baseClass, extInfo.baseClassName,
+                        extInfo.objFileForMergeData, catBodySym))
+    return false;
+
+  Defined *catListSym = nullptr;
+  if (!emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
+                           extInfo.objFileForMergeData, catListSym))
+    return false;
+
+  const uint32_t offsetFirstCat = 0;
+  if (!createSymbolReference(catListSym, catBodySym, offsetFirstCat,
+                             infoCategoryWriter.catListInfo.relocTemplate))
+    return false;
+
+  if (!emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset,
+                              extInfo, extInfo.instanceMethods))
+    return false;
+
+  if (!emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
+                              extInfo.classMethods))
+    return false;
+
+  if (!emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
+                               extInfo.protocols))
+    return false;
+
+  if (!emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset,
+                              extInfo, extInfo.instanceProps))
+    return false;
+
+  if (!emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
+                              extInfo.classProps))
+    return false;
+
+  return true;
+}
+
+// This method merges all the categories (sharing a base class) into a single
+// category.
+bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
+    std::vector<InfoInputCategory> &categories) {
+  assert(categories.size() > 1 && "Expected at least 2 categories");
+
+  ClassExtensionInfo extInfo;
+
+  for (auto &catInfo : categories)
+    if (!parseCatInfoToExtInfo(catInfo, extInfo))
+      return false;
+
+  Defined *newCatDef = nullptr;
+  if (!emitCategory(extInfo, newCatDef))
+    return false;
+
+  return true;
+}
+
+bool ObjcCategoryMerger::createSymbolReference(Defined *refFrom, Symbol *refTo,
+                                               uint32_t offset,
+                                               Reloc &relocTemplate) {
+  Reloc r = relocTemplate;
+  r.offset = offset;
+  r.addend = 0;
+  r.referent = refTo;
+  refFrom->isec->relocs.push_back(r);
+
+  return true;
+}
+
+bool ObjcCategoryMerger::collectAndValidateCategoriesData() {
+  for (InputSection *sec : allInputSections) {
+    if (sec->getName() != section_names::objcCatList)
+      continue;
+    ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
+    if (!catListCisec)
+      return registerError(
+          "__objc_catList InputSection is not a ConcatInputSection");
+
+    for (const Reloc &r : catListCisec->relocs) {
+      auto *sym = cast<Defined>(r.referent.get<Symbol *>());
+      if (!sym || !sym->getName().starts_with("__OBJC_$_CATEGORY_"))
+        continue; // Only support ObjC categories (no swift + @objc)
----------------
alx32 wrote:

__objc_catlist is for ObjC categories. But they can either be defined in pure ObjC or via swift's @objc feature. The swift @objc feature will generate slightly different data format, so we're not handling it for now. It's uncommon enough that it may not be worth handling. If need be, we can handle them in a follow-up PR. No issue with not handling them, they'll just be preserved as received and the system will load them. 

https://github.com/llvm/llvm-project/pull/82928


More information about the llvm-commits mailing list