[lld] [lld-macho] Category Merger: add support for addrsig references (PR #90903)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 2 15:36:37 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-macho

Author: None (alx32)

<details>
<summary>Changes</summary>

When generating categories, clang sometimes will generate references in the `.addrsig` section to the various category data items. Since we may erase such items after merging them, we also need to remove them from the `.addrsig` section - otherwise this will cause runtime asserts with the `.addrsig` section trying to access invalid data.

Implementation wise, we use a hashset to keep track of all erased `InputSection`'s and then go through all `.addrsig` sections and remove references to any erased `InputSection`.

---
Full diff: https://github.com/llvm/llvm-project/pull/90903.diff


2 Files Affected:

- (modified) lld/MachO/ObjC.cpp (+60-10) 
- (modified) lld/test/MachO/objc-category-merging-extern-class-minimal.s (+3) 


``````````diff
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 95fe0c9374f150..9720483d13c24a 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -21,6 +21,8 @@
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Support/TimeProfiler.h"
 
+#include <unordered_set>
+
 using namespace llvm;
 using namespace llvm::MachO;
 using namespace lld;
@@ -431,6 +433,7 @@ class ObjcCategoryMerger {
   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
 
   void eraseISec(ConcatInputSection *isec);
+  void removeRefsToErasedIsecs(std::unordered_set<InputSection *> erasedIsecs);
   void eraseMergedCategories();
 
   void generateCatListForNonErasedCategories(
@@ -472,7 +475,9 @@ class ObjcCategoryMerger {
   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
                                      uint32_t offset);
   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
-                                   uint32_t offset);
+                                   uint32_t offset,
+                                   std::unordered_set<InputSection *> &erased);
+  void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
 
   // Allocate a null-terminated StringRef backed by generatedSectionData
   StringRef newStringData(const char *str);
@@ -533,6 +538,8 @@ void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
 Symbol *
 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
                                              uint32_t offset) {
+  if (!isec)
+    return nullptr;
   const Reloc *reloc = isec->getRelocAt(offset);
 
   if (!reloc)
@@ -551,7 +558,8 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
-    const ConcatInputSection *isec, uint32_t offset) {
+    const ConcatInputSection *isec, uint32_t offset,
+    std::unordered_set<InputSection *> &erased) {
   const Reloc *reloc = isec->getRelocAt(offset);
 
   if (!reloc)
@@ -561,9 +569,11 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
   if (!sym)
     return;
 
-  if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
+  if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) {
     eraseISec(cisec);
-  else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
+    erased.insert(cisec);
+  } else if (auto *csisec =
+                 dyn_cast_or_null<CStringInputSection>(sym->isec())) {
     uint32_t totalOffset = sym->value + reloc->addend;
     StringPiece &piece = csisec->getStringPiece(totalOffset);
     piece.live = false;
@@ -1183,26 +1193,66 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   // the references to the ones we merged.
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
+  // We use erasedIsecs below to track erased sections so we can later remove
+  // references to it.
+  std::unordered_set<InputSection *> erasedIsecs;
+  erasedIsecs.reserve(categoryMap.size());
+
   // Erase the old method lists & names of the categories that were merged
   for (auto &mapEntry : categoryMap) {
     for (InfoInputCategory &catInfo : mapEntry.second) {
       if (!catInfo.wasMerged)
         continue;
 
+      erasedIsecs.insert(catInfo.catBodyIsec);
+      erasedIsecs.insert(catInfo.catListIsec);
+
       eraseISec(catInfo.catBodyIsec);
-      tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
+      tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset,
+                                  erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.instanceMethodsOffset);
+                                  catLayout.instanceMethodsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.classMethodsOffset);
+                                  catLayout.classMethodsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.protocolsOffset);
+                                  catLayout.protocolsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.classPropsOffset);
+                                  catLayout.classPropsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.instancePropsOffset);
+                                  catLayout.instancePropsOffset, erasedIsecs);
     }
   }
+
+  removeRefsToErasedIsecs(erasedIsecs);
+}
+
+// The compiler may generate references to categories inside the addrsig
+// section. This function will erase these references.
+void ObjcCategoryMerger::removeRefsToErasedIsecs(
+    std::unordered_set<InputSection *> erasedIsecs) {
+  for (InputSection *isec : inputSections) {
+    if (isec->getName() != section_names::addrSig)
+      continue;
+
+    auto removeRelocs = [&erasedIsecs](Reloc &r) {
+      ConcatInputSection *isec = nullptr;
+      isec = dyn_cast_or_null<ConcatInputSection>(
+          r.referent.dyn_cast<InputSection *>());
+      if (!isec) {
+        Defined *sym =
+            dyn_cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>());
+        if (sym)
+          isec = dyn_cast<ConcatInputSection>(sym->isec());
+      }
+      if (!isec)
+        return false;
+      return erasedIsecs.count(isec) > 0;
+    };
+
+    isec->relocs.erase(
+        std::remove_if(isec->relocs.begin(), isec->relocs.end(), removeRelocs),
+        isec->relocs.end());
+  }
 }
 
 void ObjcCategoryMerger::doMerge() {
diff --git a/lld/test/MachO/objc-category-merging-extern-class-minimal.s b/lld/test/MachO/objc-category-merging-extern-class-minimal.s
index ede7ef5d9c32d4..3ca881c2da156e 100644
--- a/lld/test/MachO/objc-category-merging-extern-class-minimal.s
+++ b/lld/test/MachO/objc-category-merging-extern-class-minimal.s
@@ -153,3 +153,6 @@ L_OBJC_IMAGE_INFO:
 	.long	0
 	.long	96
 .subsections_via_symbols
+
+.addrsig
+.addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01

``````````

</details>


https://github.com/llvm/llvm-project/pull/90903


More information about the llvm-commits mailing list