[lld] [lld-macho][NFC] Track category merger input data source language for better verification (PR #95473)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 15:21:23 PDT 2024
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/95473
>From 64fbb876b5a9ae9b79da6dbfafe76e33cddf94ef Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 13 Jun 2024 14:03:52 -0700
Subject: [PATCH 1/3] [lld-macho][NFC] Track category merger input data source
language for better checking
---
lld/MachO/ObjC.cpp | 74 +++++++++++++++++++++++++++++++---------------
lld/MachO/ObjC.h | 1 +
2 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 0105aa547e173..a277c18c7b29f 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -349,11 +349,15 @@ void objc::checkCategories() {
namespace {
class ObjcCategoryMerger {
+ // In which language was a particular construct originally defined
+ enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+
// Information about an input category
struct InfoInputCategory {
ConcatInputSection *catListIsec;
ConcatInputSection *catBodyIsec;
uint32_t offCatListIsec = 0;
+ SourceLanguage sourceLanguage = SourceUnknown;
bool wasMerged = false;
};
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
// Merged names of containers. Ex: base|firstCategory|secondCategory|...
std::string mergedContainerName;
std::string baseClassName;
- Symbol *baseClass = nullptr;
+ const Symbol *baseClass = nullptr;
+ SourceLanguage baseClassSourceLanguage = SourceUnknown;
+
CategoryLayout &catLayout;
// In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
ClassExtensionInfo &extInfo);
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
- PointerListInfo &ptrList);
+ PointerListInfo &ptrList,
+ SourceLanguage sourceLang);
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset);
+ uint32_t secOffset,
+ SourceLanguage sourceLang);
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
PointerListInfo &ptrList);
@@ -653,9 +661,9 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
// Parse a protocol list that might be linked to ConcatInputSection at a given
// offset. The format of the protocol list is different than other lists (prop
// lists, method lists) so we need to parse it differently
-void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset,
- PointerListInfo &ptrList) {
+void ObjcCategoryMerger::parseProtocolListInfo(
+ const ConcatInputSection *isec, uint32_t secOffset,
+ PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
"Tried to read pointer list beyond protocol section end");
@@ -684,8 +692,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
[[maybe_unused]] uint32_t expectedListSizeSwift =
expectedListSize - target->wordSize;
- assert((expectedListSize == ptrListSym->isec()->data.size() ||
- expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
+ assert(((expectedListSize == ptrListSym->isec()->data.size() &&
+ sourceLang == SourceObjC) ||
+ (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
+ sourceLang == SourceSwift)) &&
"Protocol list does not match expected size");
uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +718,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
// Parse a protocol list and return the PointerListInfo for it
ObjcCategoryMerger::PointerListInfo
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset) {
+ uint32_t secOffset,
+ SourceLanguage sourceLang) {
PointerListInfo ptrList;
- parseProtocolListInfo(isec, secOffset, ptrList);
+ parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
return ptrList;
}
@@ -772,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
// is this the first category we are parsing?
- if (extInfo.mergedContainerName.empty())
+ if (extInfo.mergedContainerName.empty()) {
extInfo.objFileForMergeData =
dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
- else
+ } else
extInfo.mergedContainerName += "|";
assert(extInfo.objFileForMergeData &&
@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
extInfo.classMethods);
parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
- extInfo.protocols);
+ extInfo.protocols, catInfo.sourceLanguage);
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
extInfo.instanceProps);
@@ -1151,14 +1162,19 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
if (nlCategories.count(categorySym))
continue;
- assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
- categorySym->getName().starts_with(
- objc::symbol_names::swift_objc_category));
-
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");
+ InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+ if (categorySym->getName().starts_with(objc::symbol_names::category))
+ catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+ else if (categorySym->getName().starts_with(
+ objc::symbol_names::swift_objc_category))
+ catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+ else
+ llvm_unreachable("Unexpected category symbol name");
+
// Check that the category has a reloc at 'klassOffset' (which is
// a pointer to the class symbol)
@@ -1166,7 +1182,6 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
assert(classSym && "Category does not have a valid base class");
- InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
categoryMap[classSym].push_back(catInputInfo);
collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1381,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
// Collect all the info from the categories
ClassExtensionInfo extInfo(catLayout);
+ extInfo.baseClass = baseClass;
+
+ if (baseClass->getName().starts_with(objc::symbol_names::klass))
+ extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+ else if (baseClass->getName().starts_with(
+ objc::symbol_names::swift_objc_klass))
+ extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+ else
+ llvm_unreachable("Unexpected base class symbol name");
+
for (auto &catInfo : categories) {
parseCatInfoToExtInfo(catInfo, extInfo);
}
@@ -1382,14 +1407,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
// Protocol lists are a special case - the same protocol list is in classRo
// and metaRo, so we only need to parse it once
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
- extInfo.protocols);
+ extInfo.protocols, extInfo.baseClassSourceLanguage);
// Check that the classRo and metaRo protocol lists are identical
- assert(
- parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
- parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
- "Category merger expects classRo and metaRo to have the same protocol "
- "list");
+ assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+ extInfo.baseClassSourceLanguage) ==
+ parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
+ extInfo.baseClassSourceLanguage) &&
+ "Category merger expects classRo and metaRo to have the same protocol "
+ "list");
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
extInfo.classMethods);
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 790e096caf760..db259a82fbbdb 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
constexpr const char swift_objc_category[] = "__CATEGORY_";
+constexpr const char swift_objc_klass[] = "_$s";
} // namespace symbol_names
// Check for duplicate method names within related categories / classes.
>From 627127f72a98829746f9e785317c747458ca10f6 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 14 Jun 2024 04:42:38 -0700
Subject: [PATCH 2/3] Address Feedback nr.1
---
lld/MachO/ObjC.cpp | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index a277c18c7b29f..3e955c57092de 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -350,14 +350,14 @@ namespace {
class ObjcCategoryMerger {
// In which language was a particular construct originally defined
- enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+ enum SourceLanguage { ObjC, Swift, Unknown };
// Information about an input category
struct InfoInputCategory {
ConcatInputSection *catListIsec;
ConcatInputSection *catBodyIsec;
uint32_t offCatListIsec = 0;
- SourceLanguage sourceLanguage = SourceUnknown;
+ SourceLanguage sourceLanguage = SourceLanguage::Unknown;
bool wasMerged = false;
};
@@ -418,7 +418,7 @@ class ObjcCategoryMerger {
std::string mergedContainerName;
std::string baseClassName;
const Symbol *baseClass = nullptr;
- SourceLanguage baseClassSourceLanguage = SourceUnknown;
+ SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
CategoryLayout &catLayout;
@@ -693,9 +693,9 @@ void ObjcCategoryMerger::parseProtocolListInfo(
expectedListSize - target->wordSize;
assert(((expectedListSize == ptrListSym->isec()->data.size() &&
- sourceLang == SourceObjC) ||
+ sourceLang == SourceLanguage::ObjC) ||
(expectedListSizeSwift == ptrListSym->isec()->data.size() &&
- sourceLang == SourceSwift)) &&
+ sourceLang == SourceLanguage::Swift)) &&
"Protocol list does not match expected size");
uint32_t off = protocolListHeaderLayout.totalSize;
@@ -783,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
// is this the first category we are parsing?
- if (extInfo.mergedContainerName.empty()) {
+ if (extInfo.mergedContainerName.empty())
extInfo.objFileForMergeData =
dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
- } else
+ else
extInfo.mergedContainerName += "|";
assert(extInfo.objFileForMergeData &&
@@ -1168,10 +1168,10 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
if (categorySym->getName().starts_with(objc::symbol_names::category))
- catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+ catInputInfo.sourceLanguage = SourceLanguage::ObjC;
else if (categorySym->getName().starts_with(
objc::symbol_names::swift_objc_category))
- catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+ catInputInfo.sourceLanguage = SourceLanguage::Swift;
else
llvm_unreachable("Unexpected category symbol name");
@@ -1384,10 +1384,10 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
extInfo.baseClass = baseClass;
if (baseClass->getName().starts_with(objc::symbol_names::klass))
- extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+ extInfo.baseClassSourceLanguage = SourceLanguage::ObjC;
else if (baseClass->getName().starts_with(
objc::symbol_names::swift_objc_klass))
- extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+ extInfo.baseClassSourceLanguage = SourceLanguage::Swift;
else
llvm_unreachable("Unexpected base class symbol name");
>From 62d9f4c511cb22ef472d03a8dcecce5b0a5bc90f Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Mon, 17 Jun 2024 15:21:09 -0700
Subject: [PATCH 3/3] Address feedback nr.2
---
lld/MachO/ObjC.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 3e955c57092de..413fa0bb64390 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -350,7 +350,7 @@ namespace {
class ObjcCategoryMerger {
// In which language was a particular construct originally defined
- enum SourceLanguage { ObjC, Swift, Unknown };
+ enum SourceLanguage { Unknown, ObjC, Swift };
// Information about an input category
struct InfoInputCategory {
@@ -1166,15 +1166,17 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");
- InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+ SourceLanguage eLang = SourceLanguage::Unknown;
if (categorySym->getName().starts_with(objc::symbol_names::category))
- catInputInfo.sourceLanguage = SourceLanguage::ObjC;
+ eLang = SourceLanguage::ObjC;
else if (categorySym->getName().starts_with(
objc::symbol_names::swift_objc_category))
- catInputInfo.sourceLanguage = SourceLanguage::Swift;
+ eLang = SourceLanguage::Swift;
else
llvm_unreachable("Unexpected category symbol name");
+ InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
+
// Check that the category has a reloc at 'klassOffset' (which is
// a pointer to the class symbol)
More information about the llvm-commits
mailing list