[lld] [lld-macho][NFC] Track category merger input data source language for better verification (PR #95473)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 14:19:18 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: None (alx32)

<details>
<summary>Changes</summary>

This change adds tracking for the source language of the various input structs used by the category merger. Identification is based on expected symbol names. It also adds checks to ensure we're dealing with the expected data in known scenarios. 

---
Full diff: https://github.com/llvm/llvm-project/pull/95473.diff


2 Files Affected:

- (modified) lld/MachO/ObjC.cpp (+50-24) 
- (modified) lld/MachO/ObjC.h (+1) 


``````````diff
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 0105aa547e173..a277c18c7b29f 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -349,11 +349,15 @@ void objc::checkCategories() {
 namespace {
 
 class ObjcCategoryMerger {
+  // In which language was a particular construct originally defined
+  enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+
   // Information about an input category
   struct InfoInputCategory {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
+    SourceLanguage sourceLanguage = SourceUnknown;
 
     bool wasMerged = false;
   };
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
     std::string mergedContainerName;
     std::string baseClassName;
-    Symbol *baseClass = nullptr;
+    const Symbol *baseClass = nullptr;
+    SourceLanguage baseClassSourceLanguage = SourceUnknown;
+
     CategoryLayout &catLayout;
 
     // In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
                              ClassExtensionInfo &extInfo);
 
   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
-                             PointerListInfo &ptrList);
+                             PointerListInfo &ptrList,
+                             SourceLanguage sourceLang);
 
   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
-                                        uint32_t secOffset);
+                                        uint32_t secOffset,
+                                        SourceLanguage sourceLang);
 
   void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
                             PointerListInfo &ptrList);
@@ -653,9 +661,9 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
 // Parse a protocol list that might be linked to ConcatInputSection at a given
 // offset. The format of the protocol list is different than other lists (prop
 // lists, method lists) so we need to parse it differently
-void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
-                                               uint32_t secOffset,
-                                               PointerListInfo &ptrList) {
+void ObjcCategoryMerger::parseProtocolListInfo(
+    const ConcatInputSection *isec, uint32_t secOffset,
+    PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
          "Tried to read pointer list beyond protocol section end");
 
@@ -684,8 +692,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
   [[maybe_unused]] uint32_t expectedListSizeSwift =
       expectedListSize - target->wordSize;
 
-  assert((expectedListSize == ptrListSym->isec()->data.size() ||
-          expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
+  assert(((expectedListSize == ptrListSym->isec()->data.size() &&
+           sourceLang == SourceObjC) ||
+          (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
+           sourceLang == SourceSwift)) &&
          "Protocol list does not match expected size");
 
   uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +718,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
 // Parse a protocol list and return the PointerListInfo for it
 ObjcCategoryMerger::PointerListInfo
 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
-                                          uint32_t secOffset) {
+                                          uint32_t secOffset,
+                                          SourceLanguage sourceLang) {
   PointerListInfo ptrList;
-  parseProtocolListInfo(isec, secOffset, ptrList);
+  parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
   return ptrList;
 }
 
@@ -772,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
 
   // is this the first category we are parsing?
-  if (extInfo.mergedContainerName.empty())
+  if (extInfo.mergedContainerName.empty()) {
     extInfo.objFileForMergeData =
         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
-  else
+  } else
     extInfo.mergedContainerName += "|";
 
   assert(extInfo.objFileForMergeData &&
@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
                        extInfo.classMethods);
 
   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
-                        extInfo.protocols);
+                        extInfo.protocols, catInfo.sourceLanguage);
 
   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
                        extInfo.instanceProps);
@@ -1151,14 +1162,19 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
       if (nlCategories.count(categorySym))
         continue;
 
-      assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
-             categorySym->getName().starts_with(
-                 objc::symbol_names::swift_objc_category));
-
       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
       assert(catBodyIsec &&
              "Category data section is not an ConcatInputSection");
 
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      if (categorySym->getName().starts_with(objc::symbol_names::category))
+        catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+      else if (categorySym->getName().starts_with(
+                   objc::symbol_names::swift_objc_category))
+        catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+      else
+        llvm_unreachable("Unexpected category symbol name");
+
       // Check that the category has a reloc at 'klassOffset' (which is
       // a pointer to the class symbol)
 
@@ -1166,7 +1182,6 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1381,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
 
   // Collect all the info from the categories
   ClassExtensionInfo extInfo(catLayout);
+  extInfo.baseClass = baseClass;
+
+  if (baseClass->getName().starts_with(objc::symbol_names::klass))
+    extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+  else if (baseClass->getName().starts_with(
+               objc::symbol_names::swift_objc_klass))
+    extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+  else
+    llvm_unreachable("Unexpected base class symbol name");
+
   for (auto &catInfo : categories) {
     parseCatInfoToExtInfo(catInfo, extInfo);
   }
@@ -1382,14 +1407,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
   // Protocol lists are a special case - the same protocol list is in classRo
   // and metaRo, so we only need to parse it once
   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
-                        extInfo.protocols);
+                        extInfo.protocols, extInfo.baseClassSourceLanguage);
 
   // Check that the classRo and metaRo protocol lists are identical
-  assert(
-      parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
-          parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
-      "Category merger expects classRo and metaRo to have the same protocol "
-      "list");
+  assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+                               extInfo.baseClassSourceLanguage) ==
+             parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
+                                   extInfo.baseClassSourceLanguage) &&
+         "Category merger expects classRo and metaRo to have the same protocol "
+         "list");
 
   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
                        extInfo.classMethods);
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 790e096caf760..db259a82fbbdb 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
 constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
 
 constexpr const char swift_objc_category[] = "__CATEGORY_";
+constexpr const char swift_objc_klass[] = "_$s";
 } // namespace symbol_names
 
 // Check for duplicate method names within related categories / classes.

``````````

</details>


https://github.com/llvm/llvm-project/pull/95473


More information about the llvm-commits mailing list