[lld] [lld-macho][ObjC] Implement category merging into base class (PR #92448)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 16 22:47:10 PDT 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/92448

>From 351da524870b15e2dcafee82c0c2392a014c100b Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Wed, 15 May 2024 19:54:25 -0700
Subject: [PATCH 1/2] [lld-macho][ObjC] Implement category merging into base
 class

---
 lld/MachO/ObjC.cpp                            | 159 ++++++++++++++++--
 ...imal.s => objc-category-merging-minimal.s} | 125 +++++++++++++-
 2 files changed, 271 insertions(+), 13 deletions(-)
 rename lld/test/MachO/{objc-category-merging-extern-class-minimal.s => objc-category-merging-minimal.s} (59%)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 9d1612beae872..c0b9ab669ac37 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -379,8 +379,8 @@ class ObjcCategoryMerger {
     InfoWriteSection catPtrListInfo;
   };
 
-  // Information about a pointer list in the original categories (method lists,
-  // protocol lists, etc)
+  // Information about a pointer list in the original categories or class(method
+  // lists, protocol lists, etc)
   struct PointerListInfo {
     PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
         : categoryPrefix(_categoryPrefix),
@@ -395,9 +395,9 @@ class ObjcCategoryMerger {
     std::vector<Symbol *> allPtrs;
   };
 
-  // Full information about all the categories that extend a class. This will
-  // include all the additional methods, protocols, and properties that are
-  // contained in all the categories that extend a particular class.
+  // Full information describing an ObjC class . This will include all the
+  // additional methods, protocols, and properties that are contained in the
+  // class and all the categories that extend a particular class.
   struct ClassExtensionInfo {
     ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
 
@@ -456,9 +456,9 @@ class ObjcCategoryMerger {
                               const ClassExtensionInfo &extInfo,
                               const PointerListInfo &ptrList);
 
-  void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
-                               const ClassExtensionInfo &extInfo,
-                               const PointerListInfo &ptrList);
+  Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
+                                   const ClassExtensionInfo &extInfo,
+                                   const PointerListInfo &ptrList);
 
   Defined *emitCategory(const ClassExtensionInfo &extInfo);
   Defined *emitCatListEntrySec(const std::string &forCategoryName,
@@ -474,6 +474,10 @@ class ObjcCategoryMerger {
                                    uint32_t offset);
   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
                                      uint32_t offset);
+  Defined *getClassRo(const Defined *classSym, bool getMetaRo);
+  void mergeCategoriesIntoBaseClass(const Defined *baseClass,
+                                    std::vector<InfoInputCategory> &categories);
+  void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
                                    uint32_t offset);
 
@@ -552,6 +556,32 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
   return dyn_cast_or_null<Defined>(sym);
 }
 
+// Get the class's ro_data symbol. If getMetaRo is true, then we will return
+// the meta-class's ro_data symbol. Otherwise, we will return the class
+// (instance) ro_data symbol.
+Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
+                                        bool getMetaRo) {
+  ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
+  if (!isec)
+    return nullptr;
+
+  Defined *classRo = nullptr;
+  if (getMetaRo) {
+    Defined *metaClass = tryGetDefinedAtIsecOffset(
+        isec, classLayout.metaClassOffset + classSym->value);
+
+    classRo = metaClass ? tryGetDefinedAtIsecOffset(
+                              dyn_cast<ConcatInputSection>(metaClass->isec()),
+                              classLayout.roDataOffset)
+                        : nullptr;
+  } else {
+    classRo = tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
+                                                  classSym->value);
+  }
+
+  return classRo;
+}
+
 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
@@ -769,11 +799,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
 
 // Generate a protocol list (including header) and link it into the parent at
 // the specified offset.
-void ObjcCategoryMerger::emitAndLinkProtocolList(
+Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
     Defined *parentSym, uint32_t linkAtOffset,
     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
   if (ptrList.allPtrs.empty())
-    return;
+    return nullptr;
 
   assert(ptrList.allPtrs.size() == ptrList.structCount);
 
@@ -820,6 +850,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
                           infoCategoryWriter.catPtrListInfo.relocTemplate);
     offset += target->wordSize;
   }
+
+  return ptrListSym;
 }
 
 // Generate a pointer list (including header) and link it into the parent at the
@@ -1265,10 +1297,16 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
 void ObjcCategoryMerger::doMerge() {
   collectAndValidateCategoriesData();
 
-  for (auto &entry : categoryMap)
-    if (entry.second.size() > 1)
+  for (auto &entry : categoryMap) {
+    if (isa<Defined>(entry.first)) {
+      // Merge all categories into the base class
+      auto *baseClass = cast<Defined>(entry.first);
+      mergeCategoriesIntoBaseClass(baseClass, entry.second);
+    } else if (entry.second.size() > 1) {
       // Merge all categories into a new, single category
       mergeCategoriesIntoSingleCategory(entry.second);
+    }
+  }
 
   // Erase all categories that were merged
   eraseMergedCategories();
@@ -1302,3 +1340,100 @@ void objc::mergeCategories() {
 }
 
 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
+
+void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
+    const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
+  assert(categories.size() >= 1 && "Expected at least one category to merge");
+
+  // Collect all the info from the categories
+  ClassExtensionInfo extInfo(catLayout);
+  for (auto &catInfo : categories) {
+    parseCatInfoToExtInfo(catInfo, extInfo);
+  }
+
+  // Get metadata for the base class
+  Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
+  ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
+  Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
+  ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
+
+  // Now collect the info from the base class from the various lists in the
+  // class metadata
+  parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+                        extInfo.protocols);
+
+  parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
+                       extInfo.classMethods);
+
+  parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
+                       extInfo.classProps);
+
+  parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
+                       extInfo.instanceMethods);
+
+  parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
+                       extInfo.instanceProps);
+
+  // Erase the old lists - these will be generated and replaced
+  eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
+  eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
+  eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
+  eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
+  eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
+  eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
+
+  // Emit the newly merged lists - first into the meta RO then into the class RO
+  emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
+                         extInfo.classMethods);
+
+  // Protocols are a special case - the single list is referenced by both the
+  // class RO and meta RO. Here we emit it and link it into the meta RO
+  Defined *protoListSym = emitAndLinkProtocolList(
+      metaRo, roClassLayout.baseProtocolsOffset, extInfo, extInfo.protocols);
+
+  emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
+                         extInfo.classProps);
+
+  emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
+                         extInfo.instanceMethods);
+
+  // If we emitted a new protocol list, link it to the class RO also
+  if (protoListSym) {
+    createSymbolReference(classRo, protoListSym,
+                          roClassLayout.baseProtocolsOffset,
+                          infoCategoryWriter.catBodyInfo.relocTemplate);
+  }
+
+  emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
+                         extInfo.instanceProps);
+
+  // Mark all the categories as merged - this will be used to erase them later
+  for (auto &catInfo : categories)
+    catInfo.wasMerged = true;
+}
+
+// Erase the symbol at a given offset in an InputSection
+void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
+                                                 uint32_t offset) {
+  Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
+  if (!sym)
+    return;
+
+  // Remove the symbol from isec->symbols
+  assert(isa<Defined>(sym) && "Can only erase a Defined");
+  isec->symbols.erase(
+      std::remove(isec->symbols.begin(), isec->symbols.end(), sym),
+      isec->symbols.end());
+
+  // Remove the relocs that refer to this symbol
+  auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
+  isec->relocs.erase(
+      std::remove_if(isec->relocs.begin(), isec->relocs.end(), removeAtOff),
+      isec->relocs.end());
+
+  // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
+  // the whole ConcatInputSection
+  if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
+    if (cisec->data.size() == sym->size)
+      eraseISec(cisec);
+}
diff --git a/lld/test/MachO/objc-category-merging-extern-class-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s
similarity index 59%
rename from lld/test/MachO/objc-category-merging-extern-class-minimal.s
rename to lld/test/MachO/objc-category-merging-minimal.s
index 5dd8924df5ad6..fcd90f178b150 100644
--- a/lld/test/MachO/objc-category-merging-extern-class-minimal.s
+++ b/lld/test/MachO/objc-category-merging-minimal.s
@@ -1,7 +1,8 @@
 # REQUIRES: aarch64
 # RUN: rm -rf %t; split-file %s %t && cd %t
 
-## Create a dylib with a fake base class to link against
+############ Test merging multiple categories into a single category ############
+## Create a dylib with a fake base class to link against in when merging between categories
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_fakedylib.o a64_fakedylib.s
 # RUN: %lld -arch arm64 a64_fakedylib.o -o a64_fakedylib.dylib -dylib
 
@@ -14,6 +15,15 @@
 # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS
 # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_merge.dylib | FileCheck %s --check-prefixes=MERGE_CATS
 
+############ Test merging multiple categories into the base class ############
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s
+# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
+# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o
+
+# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib  | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE
+# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE
+
+
 #### Check merge categories enabled ###
 # Check that the original categories are not there
 MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
@@ -44,6 +54,28 @@ NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
 NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 
 
+#### Check merge cateogires into base class is disabled ####
+NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
+NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
+
+#### Check merge cateogires into base class is enabled and categories are merged into base class ####
+YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
+YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
+
+YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass
+YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass
+YES_MERGE_INTO_BASE: baseMethods
+YES_MERGE_INTO_BASE-NEXT: entsize 24
+YES_MERGE_INTO_BASE-NEXT: count 3
+YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod
+YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16 at 0:8
+YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod
+YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16 at 0:8
+YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod
+YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16 at 0:8
+YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod]
 
 #--- a64_fakedylib.s
 
@@ -156,3 +188,94 @@ L_OBJC_IMAGE_INFO:
 
 .addrsig
 .addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01
+
+#--- merge_base_class_minimal.s
+; clang -c merge_base_class_minimal.mm -O3 -target arm64-apple-macos -arch arm64 -S -o merge_base_class_minimal.s
+;  ================== Generated from ObjC: ==================
+; __attribute__((objc_root_class))
+; @interface MyBaseClass
+; - (void)baseInstanceMethod;
+; @end
+;
+; @implementation MyBaseClass
+; - (void)baseInstanceMethod {}
+; @end
+;  ================== Generated from ObjC  ==================
+	.section	__TEXT,__text,regular,pure_instructions
+	.build_version macos, 11, 0
+	.p2align	2
+"-[MyBaseClass baseInstanceMethod]":
+	.cfi_startproc
+; %bb.0:
+	ret
+	.cfi_endproc
+	.section	__DATA,__objc_data
+	.globl	_OBJC_CLASS_$_MyBaseClass
+	.p2align	3, 0x0
+_OBJC_CLASS_$_MyBaseClass:
+	.quad	_OBJC_METACLASS_$_MyBaseClass
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	__OBJC_CLASS_RO_$_MyBaseClass
+	.globl	_OBJC_METACLASS_$_MyBaseClass
+	.p2align	3, 0x0
+_OBJC_METACLASS_$_MyBaseClass:
+	.quad	_OBJC_METACLASS_$_MyBaseClass
+	.quad	_OBJC_CLASS_$_MyBaseClass
+	.quad	0
+	.quad	0
+	.quad	__OBJC_METACLASS_RO_$_MyBaseClass
+	.section	__TEXT,__objc_classname,cstring_literals
+l_OBJC_CLASS_NAME_:
+	.asciz	"MyBaseClass"
+	.section	__DATA,__objc_const
+	.p2align	3, 0x0
+__OBJC_METACLASS_RO_$_MyBaseClass:
+	.long	3
+	.long	40
+	.long	40
+	.space	4
+	.quad	0
+	.quad	l_OBJC_CLASS_NAME_
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.section	__TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_:
+	.asciz	"baseInstanceMethod"
+	.section	__TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_:
+	.asciz	"v16 at 0:8"
+	.section	__DATA,__objc_const
+	.p2align	3, 0x0
+__OBJC_$_INSTANCE_METHODS_MyBaseClass:
+	.long	24
+	.long	1
+	.quad	l_OBJC_METH_VAR_NAME_
+	.quad	l_OBJC_METH_VAR_TYPE_
+	.quad	"-[MyBaseClass baseInstanceMethod]"
+	.p2align	3, 0x0
+__OBJC_CLASS_RO_$_MyBaseClass:
+	.long	2
+	.long	0
+	.long	0
+	.space	4
+	.quad	0
+	.quad	l_OBJC_CLASS_NAME_
+	.quad	__OBJC_$_INSTANCE_METHODS_MyBaseClass
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.section	__DATA,__objc_classlist,regular,no_dead_strip
+	.p2align	3, 0x0
+l_OBJC_LABEL_CLASS_$:
+	.quad	_OBJC_CLASS_$_MyBaseClass
+	.section	__DATA,__objc_imageinfo,regular,no_dead_strip
+L_OBJC_IMAGE_INFO:
+	.long	0
+	.long	64
+.subsections_via_symbols

>From 2d60e4edfae4adc91d445e6646f2d593a0a18de9 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 16 May 2024 22:46:42 -0700
Subject: [PATCH 2/2] Address Feedback nr.1

---
 lld/MachO/ObjC.cpp | 44 ++++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index c0b9ab669ac37..d0c7ea823e12f 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -565,21 +565,18 @@ Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
   if (!isec)
     return nullptr;
 
-  Defined *classRo = nullptr;
-  if (getMetaRo) {
-    Defined *metaClass = tryGetDefinedAtIsecOffset(
-        isec, classLayout.metaClassOffset + classSym->value);
-
-    classRo = metaClass ? tryGetDefinedAtIsecOffset(
-                              dyn_cast<ConcatInputSection>(metaClass->isec()),
-                              classLayout.roDataOffset)
-                        : nullptr;
-  } else {
-    classRo = tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
-                                                  classSym->value);
-  }
+  if (!getMetaRo)
+    return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
+                                               classSym->value);
+
+  Defined *metaClass = tryGetDefinedAtIsecOffset(
+      isec, classLayout.metaClassOffset + classSym->value);
+  if (!metaClass)
+    return nullptr;
 
-  return classRo;
+  return tryGetDefinedAtIsecOffset(
+      dyn_cast<ConcatInputSection>(metaClass->isec()),
+      classLayout.roDataOffset);
 }
 
 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
@@ -1297,14 +1294,13 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
 void ObjcCategoryMerger::doMerge() {
   collectAndValidateCategoriesData();
 
-  for (auto &entry : categoryMap) {
-    if (isa<Defined>(entry.first)) {
+  for (auto &[baseClass, catInfos] : categoryMap) {
+    if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
       // Merge all categories into the base class
-      auto *baseClass = cast<Defined>(entry.first);
-      mergeCategoriesIntoBaseClass(baseClass, entry.second);
-    } else if (entry.second.size() > 1) {
+      mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
+    } else if (catInfos.size() > 1) {
       // Merge all categories into a new, single category
-      mergeCategoriesIntoSingleCategory(entry.second);
+      mergeCategoriesIntoSingleCategory(catInfos);
     }
   }
 
@@ -1421,15 +1417,11 @@ void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
 
   // Remove the symbol from isec->symbols
   assert(isa<Defined>(sym) && "Can only erase a Defined");
-  isec->symbols.erase(
-      std::remove(isec->symbols.begin(), isec->symbols.end(), sym),
-      isec->symbols.end());
+  llvm::erase(isec->symbols, sym);
 
   // Remove the relocs that refer to this symbol
   auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
-  isec->relocs.erase(
-      std::remove_if(isec->relocs.begin(), isec->relocs.end(), removeAtOff),
-      isec->relocs.end());
+  llvm::erase_if(isec->relocs, removeAtOff);
 
   // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
   // the whole ConcatInputSection



More information about the llvm-commits mailing list