[lld] [lld-macho] Implement ObjC category merging (-objc_category_merging) (PR #82928)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 28 22:57:59 PST 2024


================
@@ -320,3 +342,1007 @@ void objc::checkCategories() {
       }
   }
 }
+
+namespace {
+
+class ObjcCategoryMerger {
+  // Information about an input category
+  struct InfoInputCategory {
+    ConcatInputSection *catBodyIsec;
+    ConcatInputSection *catListIsec;
+    uint32_t offCatListIsec = 0;
+
+    bool wasMerged = false;
+  };
+
+  // To write new (merged) categories or classes, we will try make limited
+  // assumptions about the alignment and the sections the various class/category
+  // info are stored in and . So we'll just reuse the same sections and
+  // alignment as already used in existing (input) categories. To do this we
+  // have InfoCategoryWriter which contains the various sections that the
+  // generated categories will be written to.
+  template <typename T> struct InfroWriteSection {
+    bool valid = false; // Data has been successfully collected from input
+    uint32_t align = 0;
+    const Section *inputSection;
+    Reloc relocTemplate;
+    T *outputSection;
+  };
+
+  struct InfoCategoryWriter {
+    InfroWriteSection<ConcatOutputSection> catListInfo;
+    InfroWriteSection<CStringSection> catNameInfo;
+    InfroWriteSection<ConcatOutputSection> catBodyInfo;
+    InfroWriteSection<ConcatOutputSection> catPtrListInfo;
+  };
+
+  // Information about a pointer list in the original categories (method lists,
+  // protocol lists, etc)
+  struct PointerListInfo {
+    PointerListInfo(const char *pszSymNamePrefix)
+        : namePrefix(pszSymNamePrefix) {}
+    const char *namePrefix;
+
+    uint32_t structSize = 0;
+    uint32_t structCount = 0;
+
+    std::vector<Symbol *> allPtrs;
+  };
+
+  // Full information about all the categories that are extending a class. This
+  // will have all the additional methods, protocols, proprieties that are
+  // contained in all the categories that extend a particular class.
+  struct ClassExtensionInfo {
+    // Merged names of containers. Ex: base|firstCategory|secondCategory|...
+    std::string mergedContainerName;
+    std::string baseClassName;
+    Symbol *baseClass = nullptr;
+    // In case we generate new data, mark the new data as belonging to this file
+    ObjFile *objFileForMergeData = nullptr;
+
+    PointerListInfo instanceMethods = "__OBJC_$_CATEGORY_INSTANCE_METHODS_";
+    PointerListInfo classMethods = "__OBJC_$_CATEGORY_CLASS_METHODS_";
+    PointerListInfo protocols = "__OBJC_CATEGORY_PROTOCOLS_$_";
+    PointerListInfo instanceProps = "__OBJC_$_PROP_LIST_";
+    PointerListInfo classProps = "__OBJC_$_CLASS_PROP_LIST_";
+  };
+
+public:
+  ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
+  bool doMerge();
+
+private:
+  // This returns bool and always false for easy 'return false;' statements
+  bool registerError(const char *msg);
+
+  bool collectAndValidateCategoriesData();
+  bool
+  mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+  bool eraseMergedCategories();
+
+  bool generateCatListForNonErasedCategories(
+      std::map<ConcatInputSection *, std::set<uint64_t>>
+          catListToErasedOffsets);
+  template <typename T>
+  bool collectSectionWriteInfoFromIsec(InputSection *isec,
+                                       InfroWriteSection<T> &catWriteInfo);
+  bool collectCategoryWriterInfoFromCategory(InfoInputCategory &catInfo);
+  bool parseCatInfoToExtInfo(InfoInputCategory &catInfo,
+                             ClassExtensionInfo &extInfo);
+
+  bool tryParseProtocolListInfo(ConcatInputSection *isec,
+                                uint32_t symbolsPerStruct,
+                                PointerListInfo &ptrList);
+
+  bool parsePointerListInfo(ConcatInputSection *isec, uint32_t secOffset,
+                            uint32_t symbolsPerStruct,
+                            PointerListInfo &ptrList);
+
+  bool emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
+                              ClassExtensionInfo &extInfo,
+                              PointerListInfo &ptrList);
+
+  bool emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
+                               ClassExtensionInfo &extInfo,
+                               PointerListInfo &ptrList);
+
+  bool emitCategory(ClassExtensionInfo &extInfo, Defined *&catBodySym);
+  bool emitCatListEntrySec(std::string &forCateogryName,
+                           std::string &forBaseClassName, ObjFile *objFile,
+                           Defined *&catListSym);
+  bool emitCategoryBody(std::string &name, Defined *nameSym,
+                        Symbol *baseClassSym, std::string &baseClassName,
+                        ObjFile *objFile, Defined *&catBodySym);
+  bool emitCategoryName(std::string &name, ObjFile *objFile,
+                        Defined *&catNameSym);
+  bool createSymbolReference(Defined *refFrom, Symbol *refTo, uint32_t offset,
+                             Reloc &relocTemplate);
+  bool tryGetSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+                                Symbol *&sym);
+  bool tryGetDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+                                 Defined *&defined);
+  bool tryEraseDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+                                   bool stringOnly = false);
+
+  CategoryLayout catLayout;
+  ClassLayout classLayout;
+  ROClassLayout roClassLayout;
+  ListHeaderLayout listHeaderLayout;
+  MethodLayout methodLayout;
+  ProtocolListHeaderLayout protocolListHeaderLayout;
+
+  InfoCategoryWriter infoCategoryWriter;
+  std::vector<ConcatInputSection *> &allInputSections;
+  // Map of base class Symbol to list of InfoInputCategory's for it
+  std::map<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+
+  // Normally, the binary data comes from the input files, but since we're
+  // generating binary data ourselves, we use the below arrays to store it in.
+  // Need this to be 'static' so the data survives past the ObjcCategoryMerger
+  // object, as the data will be read by the Writer when the final binary is
+  // generated.
+  static SmallVector<SmallString<0>> generatedNames;
+  static SmallVector<SmallVector<uint8_t>> generatedSectionData;
+};
+
+SmallVector<SmallString<0>> ObjcCategoryMerger::generatedNames;
+SmallVector<SmallVector<uint8_t>> ObjcCategoryMerger::generatedSectionData;
+
+ObjcCategoryMerger::ObjcCategoryMerger(
+    std::vector<ConcatInputSection *> &_allInputSections)
+    : catLayout(target->wordSize), classLayout(target->wordSize),
+      roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
+      methodLayout(target->wordSize),
+      protocolListHeaderLayout(target->wordSize),
+      allInputSections(_allInputSections) {}
+
+bool ObjcCategoryMerger::registerError(const char *msg) {
----------------
kyulee-com wrote:

This function is odd as it has `error` (no return/exit). Why you have `bool` return?
Do you actually `register` an error here? I'd name `reportError` something.

https://github.com/llvm/llvm-project/pull/82928


More information about the llvm-commits mailing list