[lld] [lld-macho] Category Merger: add support for addrsig references (PR #90903)

via llvm-commits llvm-commits at lists.llvm.org
Sun May 5 15:12:50 PDT 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/90903

>From 031c3ec947b711fa12dc5df7ee862d03f24c8405 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Thu, 2 May 2024 13:10:12 -0700
Subject: [PATCH 1/5] [lld-macho] Category Merger: add support for addrsig
 references

---
 lld/MachO/ObjC.cpp                            | 70 ++++++++++++++++---
 ...jc-category-merging-extern-class-minimal.s |  3 +
 2 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 4760fffebe3b30..15bd87d8260f84 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -21,6 +21,8 @@
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Support/TimeProfiler.h"
 
+#include <unordered_set>
+
 using namespace llvm;
 using namespace llvm::MachO;
 using namespace lld;
@@ -420,6 +422,7 @@ class ObjcCategoryMerger {
   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
 
   void eraseISec(ConcatInputSection *isec);
+  void removeRefsToErasedIsecs(std::unordered_set<InputSection *> erasedIsecs);
   void eraseMergedCategories();
 
   void generateCatListForNonErasedCategories(
@@ -460,7 +463,9 @@ class ObjcCategoryMerger {
   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
                                      uint32_t offset);
   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
-                                   uint32_t offset);
+                                   uint32_t offset,
+                                   std::unordered_set<InputSection *> &erased);
+  void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
 
   // Allocate a null-terminated StringRef backed by generatedSectionData
   StringRef newStringData(const char *str);
@@ -518,6 +523,8 @@ void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
 Symbol *
 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
                                              uint32_t offset) {
+  if (!isec)
+    return nullptr;
   const Reloc *reloc = isec->getRelocAt(offset);
 
   if (!reloc)
@@ -536,7 +543,8 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
-    const ConcatInputSection *isec, uint32_t offset) {
+    const ConcatInputSection *isec, uint32_t offset,
+    std::unordered_set<InputSection *> &erased) {
   const Reloc *reloc = isec->getRelocAt(offset);
 
   if (!reloc)
@@ -546,9 +554,11 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
   if (!sym)
     return;
 
-  if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
+  if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) {
     eraseISec(cisec);
-  else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
+    erased.insert(cisec);
+  } else if (auto *csisec =
+                 dyn_cast_or_null<CStringInputSection>(sym->isec())) {
     uint32_t totalOffset = sym->value + reloc->addend;
     StringPiece &piece = csisec->getStringPiece(totalOffset);
     piece.live = false;
@@ -1168,26 +1178,66 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   // the references to the ones we merged.
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
+  // We use erasedIsecs below to track erased sections so we can later remove
+  // references to it.
+  std::unordered_set<InputSection *> erasedIsecs;
+  erasedIsecs.reserve(categoryMap.size());
+
   // Erase the old method lists & names of the categories that were merged
   for (auto &mapEntry : categoryMap) {
     for (InfoInputCategory &catInfo : mapEntry.second) {
       if (!catInfo.wasMerged)
         continue;
 
+      erasedIsecs.insert(catInfo.catBodyIsec);
+      erasedIsecs.insert(catInfo.catListIsec);
+
       eraseISec(catInfo.catBodyIsec);
-      tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
+      tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset,
+                                  erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.instanceMethodsOffset);
+                                  catLayout.instanceMethodsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.classMethodsOffset);
+                                  catLayout.classMethodsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.protocolsOffset);
+                                  catLayout.protocolsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.classPropsOffset);
+                                  catLayout.classPropsOffset, erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.instancePropsOffset);
+                                  catLayout.instancePropsOffset, erasedIsecs);
     }
   }
+
+  removeRefsToErasedIsecs(erasedIsecs);
+}
+
+// The compiler may generate references to categories inside the addrsig
+// section. This function will erase these references.
+void ObjcCategoryMerger::removeRefsToErasedIsecs(
+    std::unordered_set<InputSection *> erasedIsecs) {
+  for (InputSection *isec : inputSections) {
+    if (isec->getName() != section_names::addrSig)
+      continue;
+
+    auto removeRelocs = [&erasedIsecs](Reloc &r) {
+      ConcatInputSection *isec = nullptr;
+      isec = dyn_cast_or_null<ConcatInputSection>(
+          r.referent.dyn_cast<InputSection *>());
+      if (!isec) {
+        Defined *sym =
+            dyn_cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>());
+        if (sym)
+          isec = dyn_cast<ConcatInputSection>(sym->isec());
+      }
+      if (!isec)
+        return false;
+      return erasedIsecs.count(isec) > 0;
+    };
+
+    isec->relocs.erase(
+        std::remove_if(isec->relocs.begin(), isec->relocs.end(), removeRelocs),
+        isec->relocs.end());
+  }
 }
 
 void ObjcCategoryMerger::doMerge() {
diff --git a/lld/test/MachO/objc-category-merging-extern-class-minimal.s b/lld/test/MachO/objc-category-merging-extern-class-minimal.s
index ea79f29a421c5c..796993799f2db0 100644
--- a/lld/test/MachO/objc-category-merging-extern-class-minimal.s
+++ b/lld/test/MachO/objc-category-merging-extern-class-minimal.s
@@ -153,3 +153,6 @@ L_OBJC_IMAGE_INFO:
 	.long	0
 	.long	96
 .subsections_via_symbols
+
+.addrsig
+.addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01

>From f46b5ae22561c25383f0b4036bb12fd12d0bb429 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Thu, 2 May 2024 17:41:07 -0700
Subject: [PATCH 2/5] Address feedback nr.1

---
 lld/MachO/ObjC.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 15bd87d8260f84..20abc730525f63 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -422,7 +422,7 @@ class ObjcCategoryMerger {
   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
 
   void eraseISec(ConcatInputSection *isec);
-  void removeRefsToErasedIsecs(std::unordered_set<InputSection *> erasedIsecs);
+  void removeRefsToErasedIsecs(std::unordered_set<InputSection *> &erasedIsecs);
   void eraseMergedCategories();
 
   void generateCatListForNonErasedCategories(
@@ -1189,10 +1189,13 @@ void ObjcCategoryMerger::eraseMergedCategories() {
       if (!catInfo.wasMerged)
         continue;
 
+      eraseISec(catInfo.catBodyIsec);
+      // Mark the category body as having been erased
       erasedIsecs.insert(catInfo.catBodyIsec);
+
+      // Also mark the catListIsec as having been erased, it has already been erased above
       erasedIsecs.insert(catInfo.catListIsec);
 
-      eraseISec(catInfo.catBodyIsec);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset,
                                   erasedIsecs);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
@@ -1214,14 +1217,13 @@ void ObjcCategoryMerger::eraseMergedCategories() {
 // The compiler may generate references to categories inside the addrsig
 // section. This function will erase these references.
 void ObjcCategoryMerger::removeRefsToErasedIsecs(
-    std::unordered_set<InputSection *> erasedIsecs) {
+    std::unordered_set<InputSection *> &erasedIsecs) {
   for (InputSection *isec : inputSections) {
     if (isec->getName() != section_names::addrSig)
       continue;
 
     auto removeRelocs = [&erasedIsecs](Reloc &r) {
-      ConcatInputSection *isec = nullptr;
-      isec = dyn_cast_or_null<ConcatInputSection>(
+      auto *isec = dyn_cast_or_null<ConcatInputSection>(
           r.referent.dyn_cast<InputSection *>());
       if (!isec) {
         Defined *sym =

>From 650113fdc315b2ce5164666de3eedb557c049918 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Fri, 3 May 2024 11:46:07 -0700
Subject: [PATCH 3/5] Address feedback nr.2

---
 lld/MachO/ObjC.cpp | 46 ++++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 20abc730525f63..7dc39dbf0a9ca4 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -422,7 +422,7 @@ class ObjcCategoryMerger {
   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
 
   void eraseISec(ConcatInputSection *isec);
-  void removeRefsToErasedIsecs(std::unordered_set<InputSection *> &erasedIsecs);
+  void removeRefsToErasedIsecs();
   void eraseMergedCategories();
 
   void generateCatListForNonErasedCategories(
@@ -463,8 +463,7 @@ class ObjcCategoryMerger {
   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
                                      uint32_t offset);
   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
-                                   uint32_t offset,
-                                   std::unordered_set<InputSection *> &erased);
+                                   uint32_t offset);
   void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
 
   // Allocate a null-terminated StringRef backed by generatedSectionData
@@ -483,6 +482,8 @@ class ObjcCategoryMerger {
   std::vector<ConcatInputSection *> &allInputSections;
   // Map of base class Symbol to list of InfoInputCategory's for it
   DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+  // Set for tracking InputSection erased via eraseISec
+  std::unordered_set<InputSection *> erasedIsecs;
 
   // Normally, the binary data comes from the input files, but since we're
   // generating binary data ourselves, we use the below array to store it in.
@@ -543,8 +544,7 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
-    const ConcatInputSection *isec, uint32_t offset,
-    std::unordered_set<InputSection *> &erased) {
+    const ConcatInputSection *isec, uint32_t offset) {
   const Reloc *reloc = isec->getRelocAt(offset);
 
   if (!reloc)
@@ -556,7 +556,6 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
 
   if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) {
     eraseISec(cisec);
-    erased.insert(cisec);
   } else if (auto *csisec =
                  dyn_cast_or_null<CStringInputSection>(sym->isec())) {
     uint32_t totalOffset = sym->value + reloc->addend;
@@ -1151,6 +1150,8 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
 }
 
 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
+  erasedIsecs.insert(isec);
+
   isec->live = false;
   for (auto &sym : isec->symbols)
     sym->used = false;
@@ -1178,11 +1179,6 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   // the references to the ones we merged.
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
-  // We use erasedIsecs below to track erased sections so we can later remove
-  // references to it.
-  std::unordered_set<InputSection *> erasedIsecs;
-  erasedIsecs.reserve(categoryMap.size());
-
   // Erase the old method lists & names of the categories that were merged
   for (auto &mapEntry : categoryMap) {
     for (InfoInputCategory &catInfo : mapEntry.second) {
@@ -1190,39 +1186,33 @@ void ObjcCategoryMerger::eraseMergedCategories() {
         continue;
 
       eraseISec(catInfo.catBodyIsec);
-      // Mark the category body as having been erased
-      erasedIsecs.insert(catInfo.catBodyIsec);
-
-      // Also mark the catListIsec as having been erased, it has already been erased above
-      erasedIsecs.insert(catInfo.catListIsec);
 
-      tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset,
-                                  erasedIsecs);
+      tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.instanceMethodsOffset, erasedIsecs);
+                                  catLayout.instanceMethodsOffset);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.classMethodsOffset, erasedIsecs);
+                                  catLayout.classMethodsOffset);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.protocolsOffset, erasedIsecs);
+                                  catLayout.protocolsOffset);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.classPropsOffset, erasedIsecs);
+                                  catLayout.classPropsOffset);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
-                                  catLayout.instancePropsOffset, erasedIsecs);
+                                  catLayout.instancePropsOffset);
     }
   }
 
-  removeRefsToErasedIsecs(erasedIsecs);
+  removeRefsToErasedIsecs();
 }
 
 // The compiler may generate references to categories inside the addrsig
 // section. This function will erase these references.
-void ObjcCategoryMerger::removeRefsToErasedIsecs(
-    std::unordered_set<InputSection *> &erasedIsecs) {
+void ObjcCategoryMerger::removeRefsToErasedIsecs() {
   for (InputSection *isec : inputSections) {
     if (isec->getName() != section_names::addrSig)
       continue;
 
-    auto removeRelocs = [&erasedIsecs](Reloc &r) {
+    auto &_erasedIsecs = erasedIsecs;
+    auto removeRelocs = [&_erasedIsecs](Reloc &r) {
       auto *isec = dyn_cast_or_null<ConcatInputSection>(
           r.referent.dyn_cast<InputSection *>());
       if (!isec) {
@@ -1233,7 +1223,7 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs(
       }
       if (!isec)
         return false;
-      return erasedIsecs.count(isec) > 0;
+      return _erasedIsecs.count(isec) > 0;
     };
 
     isec->relocs.erase(

>From cee422b1db4a569ac3e89bf203d6088e42d9b7ac Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Fri, 3 May 2024 15:15:32 -0700
Subject: [PATCH 4/5] Address feedback nr.3

---
 lld/MachO/ObjC.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 7dc39dbf0a9ca4..18d5344436b0d7 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1211,8 +1211,7 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
     if (isec->getName() != section_names::addrSig)
       continue;
 
-    auto &_erasedIsecs = erasedIsecs;
-    auto removeRelocs = [&_erasedIsecs](Reloc &r) {
+    auto removeRelocs = [this](Reloc &r) {
       auto *isec = dyn_cast_or_null<ConcatInputSection>(
           r.referent.dyn_cast<InputSection *>());
       if (!isec) {
@@ -1223,7 +1222,7 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
       }
       if (!isec)
         return false;
-      return _erasedIsecs.count(isec) > 0;
+      return erasedIsecs.count(isec) > 0;
     };
 
     isec->relocs.erase(

>From 759172fb58b780986b28616abb32ddd4e01b5200 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Sun, 5 May 2024 15:12:26 -0700
Subject: [PATCH 5/5] Address Feedback Nr.4

---
 lld/MachO/ObjC.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 18d5344436b0d7..53296577e38769 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -21,8 +21,6 @@
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Support/TimeProfiler.h"
 
-#include <unordered_set>
-
 using namespace llvm;
 using namespace llvm::MachO;
 using namespace lld;
@@ -483,7 +481,7 @@ class ObjcCategoryMerger {
   // Map of base class Symbol to list of InfoInputCategory's for it
   DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
   // Set for tracking InputSection erased via eraseISec
-  std::unordered_set<InputSection *> erasedIsecs;
+  DenseSet<InputSection *> erasedIsecs;
 
   // Normally, the binary data comes from the input files, but since we're
   // generating binary data ourselves, we use the below array to store it in.
@@ -1225,9 +1223,7 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
       return erasedIsecs.count(isec) > 0;
     };
 
-    isec->relocs.erase(
-        std::remove_if(isec->relocs.begin(), isec->relocs.end(), removeRelocs),
-        isec->relocs.end());
+    llvm::erase_if(isec->relocs, removeRelocs);
   }
 }
 



More information about the llvm-commits mailing list