[lld] [lld-macho][NFC] Track category merger input data source language for better verification (PR #95473)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 15:21:23 PDT 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/95473

>From 64fbb876b5a9ae9b79da6dbfafe76e33cddf94ef Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 13 Jun 2024 14:03:52 -0700
Subject: [PATCH 1/3] [lld-macho][NFC] Track category merger input data source
 language for better checking

---
 lld/MachO/ObjC.cpp | 74 +++++++++++++++++++++++++++++++---------------
 lld/MachO/ObjC.h   |  1 +
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 0105aa547e173..a277c18c7b29f 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -349,11 +349,15 @@ void objc::checkCategories() {
 namespace {
 
 class ObjcCategoryMerger {
+  // In which language was a particular construct originally defined
+  enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+
   // Information about an input category
   struct InfoInputCategory {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
+    SourceLanguage sourceLanguage = SourceUnknown;
 
     bool wasMerged = false;
   };
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
     std::string mergedContainerName;
     std::string baseClassName;
-    Symbol *baseClass = nullptr;
+    const Symbol *baseClass = nullptr;
+    SourceLanguage baseClassSourceLanguage = SourceUnknown;
+
     CategoryLayout &catLayout;
 
     // In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
                              ClassExtensionInfo &extInfo);
 
   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
-                             PointerListInfo &ptrList);
+                             PointerListInfo &ptrList,
+                             SourceLanguage sourceLang);
 
   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
-                                        uint32_t secOffset);
+                                        uint32_t secOffset,
+                                        SourceLanguage sourceLang);
 
   void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
                             PointerListInfo &ptrList);
@@ -653,9 +661,9 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
 // Parse a protocol list that might be linked to ConcatInputSection at a given
 // offset. The format of the protocol list is different than other lists (prop
 // lists, method lists) so we need to parse it differently
-void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
-                                               uint32_t secOffset,
-                                               PointerListInfo &ptrList) {
+void ObjcCategoryMerger::parseProtocolListInfo(
+    const ConcatInputSection *isec, uint32_t secOffset,
+    PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
          "Tried to read pointer list beyond protocol section end");
 
@@ -684,8 +692,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
   [[maybe_unused]] uint32_t expectedListSizeSwift =
       expectedListSize - target->wordSize;
 
-  assert((expectedListSize == ptrListSym->isec()->data.size() ||
-          expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
+  assert(((expectedListSize == ptrListSym->isec()->data.size() &&
+           sourceLang == SourceObjC) ||
+          (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
+           sourceLang == SourceSwift)) &&
          "Protocol list does not match expected size");
 
   uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +718,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
 // Parse a protocol list and return the PointerListInfo for it
 ObjcCategoryMerger::PointerListInfo
 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
-                                          uint32_t secOffset) {
+                                          uint32_t secOffset,
+                                          SourceLanguage sourceLang) {
   PointerListInfo ptrList;
-  parseProtocolListInfo(isec, secOffset, ptrList);
+  parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
   return ptrList;
 }
 
@@ -772,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
 
   // is this the first category we are parsing?
-  if (extInfo.mergedContainerName.empty())
+  if (extInfo.mergedContainerName.empty()) {
     extInfo.objFileForMergeData =
         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
-  else
+  } else
     extInfo.mergedContainerName += "|";
 
   assert(extInfo.objFileForMergeData &&
@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
                        extInfo.classMethods);
 
   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
-                        extInfo.protocols);
+                        extInfo.protocols, catInfo.sourceLanguage);
 
   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
                        extInfo.instanceProps);
@@ -1151,14 +1162,19 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
       if (nlCategories.count(categorySym))
         continue;
 
-      assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
-             categorySym->getName().starts_with(
-                 objc::symbol_names::swift_objc_category));
-
       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
       assert(catBodyIsec &&
              "Category data section is not an ConcatInputSection");
 
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      if (categorySym->getName().starts_with(objc::symbol_names::category))
+        catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+      else if (categorySym->getName().starts_with(
+                   objc::symbol_names::swift_objc_category))
+        catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+      else
+        llvm_unreachable("Unexpected category symbol name");
+
       // Check that the category has a reloc at 'klassOffset' (which is
       // a pointer to the class symbol)
 
@@ -1166,7 +1182,6 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1381,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
 
   // Collect all the info from the categories
   ClassExtensionInfo extInfo(catLayout);
+  extInfo.baseClass = baseClass;
+
+  if (baseClass->getName().starts_with(objc::symbol_names::klass))
+    extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+  else if (baseClass->getName().starts_with(
+               objc::symbol_names::swift_objc_klass))
+    extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+  else
+    llvm_unreachable("Unexpected base class symbol name");
+
   for (auto &catInfo : categories) {
     parseCatInfoToExtInfo(catInfo, extInfo);
   }
@@ -1382,14 +1407,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
   // Protocol lists are a special case - the same protocol list is in classRo
   // and metaRo, so we only need to parse it once
   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
-                        extInfo.protocols);
+                        extInfo.protocols, extInfo.baseClassSourceLanguage);
 
   // Check that the classRo and metaRo protocol lists are identical
-  assert(
-      parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
-          parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
-      "Category merger expects classRo and metaRo to have the same protocol "
-      "list");
+  assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+                               extInfo.baseClassSourceLanguage) ==
+             parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
+                                   extInfo.baseClassSourceLanguage) &&
+         "Category merger expects classRo and metaRo to have the same protocol "
+         "list");
 
   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
                        extInfo.classMethods);
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 790e096caf760..db259a82fbbdb 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
 constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
 
 constexpr const char swift_objc_category[] = "__CATEGORY_";
+constexpr const char swift_objc_klass[] = "_$s";
 } // namespace symbol_names
 
 // Check for duplicate method names within related categories / classes.

>From 627127f72a98829746f9e785317c747458ca10f6 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 14 Jun 2024 04:42:38 -0700
Subject: [PATCH 2/3] Address Feedback nr.1

---
 lld/MachO/ObjC.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index a277c18c7b29f..3e955c57092de 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -350,14 +350,14 @@ namespace {
 
 class ObjcCategoryMerger {
   // In which language was a particular construct originally defined
-  enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+  enum SourceLanguage { ObjC, Swift, Unknown };
 
   // Information about an input category
   struct InfoInputCategory {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
-    SourceLanguage sourceLanguage = SourceUnknown;
+    SourceLanguage sourceLanguage = SourceLanguage::Unknown;
 
     bool wasMerged = false;
   };
@@ -418,7 +418,7 @@ class ObjcCategoryMerger {
     std::string mergedContainerName;
     std::string baseClassName;
     const Symbol *baseClass = nullptr;
-    SourceLanguage baseClassSourceLanguage = SourceUnknown;
+    SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
 
     CategoryLayout &catLayout;
 
@@ -693,9 +693,9 @@ void ObjcCategoryMerger::parseProtocolListInfo(
       expectedListSize - target->wordSize;
 
   assert(((expectedListSize == ptrListSym->isec()->data.size() &&
-           sourceLang == SourceObjC) ||
+           sourceLang == SourceLanguage::ObjC) ||
           (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
-           sourceLang == SourceSwift)) &&
+           sourceLang == SourceLanguage::Swift)) &&
          "Protocol list does not match expected size");
 
   uint32_t off = protocolListHeaderLayout.totalSize;
@@ -783,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
 
   // is this the first category we are parsing?
-  if (extInfo.mergedContainerName.empty()) {
+  if (extInfo.mergedContainerName.empty())
     extInfo.objFileForMergeData =
         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
-  } else
+  else
     extInfo.mergedContainerName += "|";
 
   assert(extInfo.objFileForMergeData &&
@@ -1168,10 +1168,10 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
 
       InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
       if (categorySym->getName().starts_with(objc::symbol_names::category))
-        catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+        catInputInfo.sourceLanguage = SourceLanguage::ObjC;
       else if (categorySym->getName().starts_with(
                    objc::symbol_names::swift_objc_category))
-        catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+        catInputInfo.sourceLanguage = SourceLanguage::Swift;
       else
         llvm_unreachable("Unexpected category symbol name");
 
@@ -1384,10 +1384,10 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
   extInfo.baseClass = baseClass;
 
   if (baseClass->getName().starts_with(objc::symbol_names::klass))
-    extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+    extInfo.baseClassSourceLanguage = SourceLanguage::ObjC;
   else if (baseClass->getName().starts_with(
                objc::symbol_names::swift_objc_klass))
-    extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+    extInfo.baseClassSourceLanguage = SourceLanguage::Swift;
   else
     llvm_unreachable("Unexpected base class symbol name");
 

>From 62d9f4c511cb22ef472d03a8dcecce5b0a5bc90f Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Mon, 17 Jun 2024 15:21:09 -0700
Subject: [PATCH 3/3] Address feedback nr.2

---
 lld/MachO/ObjC.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 3e955c57092de..413fa0bb64390 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -350,7 +350,7 @@ namespace {
 
 class ObjcCategoryMerger {
   // In which language was a particular construct originally defined
-  enum SourceLanguage { ObjC, Swift, Unknown };
+  enum SourceLanguage { Unknown, ObjC, Swift };
 
   // Information about an input category
   struct InfoInputCategory {
@@ -1166,15 +1166,17 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
       assert(catBodyIsec &&
              "Category data section is not an ConcatInputSection");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      SourceLanguage eLang = SourceLanguage::Unknown;
       if (categorySym->getName().starts_with(objc::symbol_names::category))
-        catInputInfo.sourceLanguage = SourceLanguage::ObjC;
+        eLang = SourceLanguage::ObjC;
       else if (categorySym->getName().starts_with(
                    objc::symbol_names::swift_objc_category))
-        catInputInfo.sourceLanguage = SourceLanguage::Swift;
+        eLang = SourceLanguage::Swift;
       else
         llvm_unreachable("Unexpected category symbol name");
 
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
+
       // Check that the category has a reloc at 'klassOffset' (which is
       // a pointer to the class symbol)
 



More information about the llvm-commits mailing list