[lld] [lld-macho][NFC] Track category merger input data source language for better verification (PR #95473)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 14:09:24 PDT 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/95473

>From 4b020ccfa6a7da6927a0e38e4d8c53d20a383b0b Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 13 Jun 2024 14:03:52 -0700
Subject: [PATCH] [lld-macho][NFC] Track category merger input data source
 language for better checking

---
 lld/MachO/ObjC.cpp | 70 +++++++++++++++++++++++++++++++---------------
 lld/MachO/ObjC.h   |  1 +
 2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 0105aa547e173..f0fde4da8a764 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -349,11 +349,15 @@ void objc::checkCategories() {
 namespace {
 
 class ObjcCategoryMerger {
+  // In which language was a particular construct originally defined
+  enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+
   // Information about an input category
   struct InfoInputCategory {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
+    SourceLanguage sourceLanguage = SourceUnknown;
 
     bool wasMerged = false;
   };
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
     std::string mergedContainerName;
     std::string baseClassName;
-    Symbol *baseClass = nullptr;
+    const Symbol *baseClass = nullptr;
+    SourceLanguage baseClassSourceLanguage = SourceUnknown;
+
     CategoryLayout &catLayout;
 
     // In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
                              ClassExtensionInfo &extInfo);
 
   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
-                             PointerListInfo &ptrList);
+                             PointerListInfo &ptrList,
+                             SourceLanguage sourceLang);
 
   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
-                                        uint32_t secOffset);
+                                        uint32_t secOffset,
+                                        SourceLanguage sourceLang);
 
   void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
                             PointerListInfo &ptrList);
@@ -655,7 +663,8 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
 // lists, method lists) so we need to parse it differently
 void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
                                                uint32_t secOffset,
-                                               PointerListInfo &ptrList) {
+                                               PointerListInfo &ptrList,
+                                               SourceLanguage sourceLang) {
   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
          "Tried to read pointer list beyond protocol section end");
 
@@ -684,8 +693,8 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
   [[maybe_unused]] uint32_t expectedListSizeSwift =
       expectedListSize - target->wordSize;
 
-  assert((expectedListSize == ptrListSym->isec()->data.size() ||
-          expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
+  assert(((expectedListSize == ptrListSym->isec()->data.size() && sourceLang == SourceObjC) ||
+          (expectedListSizeSwift == ptrListSym->isec()->data.size() && sourceLang == SourceSwift)) &&
          "Protocol list does not match expected size");
 
   uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +717,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
 // Parse a protocol list and return the PointerListInfo for it
 ObjcCategoryMerger::PointerListInfo
 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
-                                          uint32_t secOffset) {
+                                          uint32_t secOffset,
+                                          SourceLanguage sourceLang) {
   PointerListInfo ptrList;
-  parseProtocolListInfo(isec, secOffset, ptrList);
+  parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
   return ptrList;
 }
 
@@ -772,10 +782,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
 
   // is this the first category we are parsing?
-  if (extInfo.mergedContainerName.empty())
+  if (extInfo.mergedContainerName.empty()) {
+    auto *theFile = catInfo.catBodyIsec->getFile();
     extInfo.objFileForMergeData =
         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
-  else
+  } else
     extInfo.mergedContainerName += "|";
 
   assert(extInfo.objFileForMergeData &&
@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
                        extInfo.classMethods);
 
   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
-                        extInfo.protocols);
+                        extInfo.protocols, catInfo.sourceLanguage);
 
   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
                        extInfo.instanceProps);
@@ -1151,14 +1162,19 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
       if (nlCategories.count(categorySym))
         continue;
 
-      assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
-             categorySym->getName().starts_with(
-                 objc::symbol_names::swift_objc_category));
-
       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
       assert(catBodyIsec &&
              "Category data section is not an ConcatInputSection");
 
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      if (categorySym->getName().starts_with(objc::symbol_names::category))
+        catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+      else if (categorySym->getName().starts_with(
+                   objc::symbol_names::swift_objc_category))
+        catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+      else
+        llvm_unreachable("Unexpected category symbol name");
+
       // Check that the category has a reloc at 'klassOffset' (which is
       // a pointer to the class symbol)
 
@@ -1166,7 +1182,6 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1381,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
 
   // Collect all the info from the categories
   ClassExtensionInfo extInfo(catLayout);
+  extInfo.baseClass = baseClass;
+
+  if (baseClass->getName().starts_with(objc::symbol_names::klass))
+    extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+  else if (baseClass->getName().starts_with(
+               objc::symbol_names::swift_objc_klass))
+    extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+  else
+    llvm_unreachable("Unexpected base class symbol name");
+
   for (auto &catInfo : categories) {
     parseCatInfoToExtInfo(catInfo, extInfo);
   }
@@ -1382,14 +1407,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
   // Protocol lists are a special case - the same protocol list is in classRo
   // and metaRo, so we only need to parse it once
   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
-                        extInfo.protocols);
+                        extInfo.protocols, extInfo.baseClassSourceLanguage);
 
   // Check that the classRo and metaRo protocol lists are identical
-  assert(
-      parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
-          parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
-      "Category merger expects classRo and metaRo to have the same protocol "
-      "list");
+  assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+                               extInfo.baseClassSourceLanguage) ==
+             parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
+                                   extInfo.baseClassSourceLanguage) &&
+         "Category merger expects classRo and metaRo to have the same protocol "
+         "list");
 
   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
                        extInfo.classMethods);
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 790e096caf760..db259a82fbbdb 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
 constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
 
 constexpr const char swift_objc_category[] = "__CATEGORY_";
+constexpr const char swift_objc_klass[] = "_$s";
 } // namespace symbol_names
 
 // Check for duplicate method names within related categories / classes.



More information about the llvm-commits mailing list