[lld] [lld-macho] Implement ObjC category merging (-objc_category_merging) (PR #82928)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 11:19:33 PST 2024
================
@@ -320,3 +332,915 @@ void objc::checkCategories() {
}
}
}
+
+namespace {
+
+class ObjcCategoryMerger {
+ // Information about an input category
+ struct InfoInputCategory {
+ ConcatInputSection *catBodyIsec;
+ ConcatInputSection *catListIsec;
+ uint32_t offCatListIsec = 0;
+
+ bool wasMerged = false;
+ };
+
+ // To write new (merged) categories or classes, we will try make limited
+ // assumptions about the alignment and the sections the various class/category
+ // info are stored in and . So we'll just reuse the same sections and
+ // alignment as already used in existing (input) categories. To do this we
+ // have InfoCategoryWriter which contains the various sections that the
+ // generated categories will be written to.
+ template <typename T> struct InfroWriteSection {
+ bool valid = false; // Data has been successfully collected from input
+ uint32_t align = 0;
+ const Section *inputSection;
+ Reloc relocTemplate;
+ T *outputSection;
+ };
+
+ struct InfoCategoryWriter {
+ InfroWriteSection<ConcatOutputSection> catListInfo;
+ InfroWriteSection<ConcatOutputSection> catBodyInfo;
+ InfroWriteSection<CStringSection> catNameInfo;
+ InfroWriteSection<ConcatOutputSection> catPtrListInfo;
+ };
+
+ // Information about a pointer list in the original categories (method lists,
+ // protocol lists, etc)
+ struct PointerListInfo {
+ PointerListInfo(const char *pszSymNamePrefix)
+ : namePrefix(pszSymNamePrefix) {}
+ const char *namePrefix;
+
+ uint32_t structSize = 0;
+ uint32_t structCount = 0;
+
+ std::vector<Symbol *> allPtrs;
+ };
+
+ // Full information about all the categories that are extending a class. This
+ // will have all the additional methods, protocols, proprieties that are
+ // contained in all the categories that extend a particular class.
+ struct ClassExtensionInfo {
+ // Merged names of containers. Ex: base|firstCategory|secondCategory|...
+ std::string mergedContainerName;
+ std::string baseClassName;
+ Symbol *baseClass = nullptr;
+ // In case we generate new data, mark the new data as belonging to this file
+ ObjFile *objFileForMergeData = nullptr;
+
+ PointerListInfo instanceMethods =
+ objc::symbol_names::categoryInstanceMethods;
+ PointerListInfo classMethods = objc::symbol_names::categoryClassMethods;
+ PointerListInfo protocols = objc::symbol_names::categoryProtocols;
+ PointerListInfo instanceProps = objc::symbol_names::listProprieties;
+ PointerListInfo classProps = objc::symbol_names::klassPropList;
+ };
+
+public:
+ ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
+ void doMerge();
+ static void doCleanup();
+
+private:
+ void collectAndValidateCategoriesData();
+ void
+ mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+
+ void eraseISec(ConcatInputSection *isec);
+ void eraseMergedCategories();
+
+ void generateCatListForNonErasedCategories(
+ std::map<ConcatInputSection *, std::set<uint64_t>>
+ catListToErasedOffsets);
+ template <typename T>
+ void collectSectionWriteInfoFromIsec(InputSection *isec,
+ InfroWriteSection<T> &catWriteInfo);
+ void collectCategoryWriterInfoFromCategory(InfoInputCategory &catInfo);
+ void parseCatInfoToExtInfo(InfoInputCategory &catInfo,
+ ClassExtensionInfo &extInfo);
+
+ void parseProtocolListInfo(ConcatInputSection *isec,
+ uint32_t symbolsPerStruct,
+ PointerListInfo &ptrList);
+
+ void parsePointerListInfo(ConcatInputSection *isec, uint32_t secOffset,
+ uint32_t symbolsPerStruct,
+ PointerListInfo &ptrList);
+
+ void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
+ ClassExtensionInfo &extInfo,
+ PointerListInfo &ptrList);
+
+ void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
+ ClassExtensionInfo &extInfo,
+ PointerListInfo &ptrList);
+
+ void emitCategory(ClassExtensionInfo &extInfo, Defined *&catBodySym);
+ void emitCatListEntrySec(std::string &forCateogryName,
+ std::string &forBaseClassName, ObjFile *objFile,
+ Defined *&catListSym);
+ void emitCategoryBody(std::string &name, Defined *nameSym,
+ Symbol *baseClassSym, std::string &baseClassName,
+ ObjFile *objFile, Defined *&catBodySym);
+ void emitCategoryName(std::string &name, ObjFile *objFile,
+ Defined *&catNameSym);
+ void createSymbolReference(Defined *refFrom, Symbol *refTo, uint32_t offset,
+ Reloc &relocTemplate);
+ bool tryGetSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+ Symbol *&sym);
+ bool tryGetDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+ Defined *&defined);
+ void tryEraseDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+ bool stringOnly = false);
+
+ CategoryLayout catLayout;
+ ClassLayout classLayout;
+ ROClassLayout roClassLayout;
+ ListHeaderLayout listHeaderLayout;
+ MethodLayout methodLayout;
+ ProtocolListHeaderLayout protocolListHeaderLayout;
+
+ InfoCategoryWriter infoCategoryWriter;
+ std::vector<ConcatInputSection *> &allInputSections;
+ // Map of base class Symbol to list of InfoInputCategory's for it
+ std::map<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+
+ // Normally, the binary data comes from the input files, but since we're
+ // generating binary data ourselves, we use the below arrays to store it in.
+ // Need this to be 'static' so the data survives past the ObjcCategoryMerger
+ // object, as the data will be read by the Writer when the final binary is
+ // generated.
+ static SmallVector<SmallString<0>> generatedNames;
+ static SmallVector<SmallVector<uint8_t>> generatedSectionData;
+};
+
+SmallVector<SmallString<0>> ObjcCategoryMerger::generatedNames;
+SmallVector<SmallVector<uint8_t>> ObjcCategoryMerger::generatedSectionData;
+
+ObjcCategoryMerger::ObjcCategoryMerger(
+ std::vector<ConcatInputSection *> &_allInputSections)
+ : catLayout(target->wordSize), classLayout(target->wordSize),
+ roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
+ methodLayout(target->wordSize),
+ protocolListHeaderLayout(target->wordSize),
+ allInputSections(_allInputSections) {}
+
+// This is a template so that it can be used both for CStringSection and
+// ConcatOutputSection
+template <typename T>
+void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
+ InputSection *isec, InfroWriteSection<T> &catWriteInfo) {
+
+ catWriteInfo.inputSection = &isec->section;
+ catWriteInfo.align = isec->align;
+ catWriteInfo.outputSection = dyn_cast_or_null<T>(isec->parent);
+
+ assert(catWriteInfo.outputSection &&
+ "outputSection may not be null in collectSectionWriteInfoFromIsec.");
+
+ if (isec->relocs.size())
+ catWriteInfo.relocTemplate = isec->relocs[0];
+
+ catWriteInfo.valid = true;
+}
+
+bool ObjcCategoryMerger::tryGetSymbolAtIsecOffset(ConcatInputSection *isec,
+ uint32_t offset,
+ Symbol *&sym) {
+ const Reloc *reloc = isec->getRelocAt(offset);
+
+ if (!reloc)
+ return false;
+
+ sym = reloc->referent.get<Symbol *>();
+ return sym != nullptr;
+}
+
+bool ObjcCategoryMerger::tryGetDefinedAtIsecOffset(ConcatInputSection *isec,
+ uint32_t offset,
+ Defined *&defined) {
+ Symbol *sym;
+ if (!tryGetSymbolAtIsecOffset(isec, offset, sym))
+ return false;
+
+ defined = dyn_cast_or_null<Defined>(sym);
+ return defined != nullptr;
+}
+
+// Given an ConcatInputSection and an offset, if there is a symbol(Defined) at
+// that offset, then erase the symbol (mark it not live) from the final output.
+// Used for easely erasing already merged strings, method lists, etc ...
+void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(ConcatInputSection *isec,
+ uint32_t offset,
+ bool stringOnly) {
+ const Reloc *reloc = isec->getRelocAt(offset);
+
+ if (!reloc)
+ return;
+
+ Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ if (!sym)
+ return;
+
+ auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec);
+ if (!stringOnly && cisec) {
+ eraseISec(cisec);
+ return;
+ }
+
+ if (auto *cisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
+ uint32_t totalOffset = sym->value + reloc->addend;
+ StringPiece &piece = cisec->getStringPiece(totalOffset);
+ piece.live = false;
+ return;
+ }
+}
+
+void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
+ InfoInputCategory &catInfo) {
+
+ collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+ catInfo.catListIsec, infoCategoryWriter.catListInfo);
+ collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+ catInfo.catBodyIsec, infoCategoryWriter.catBodyInfo);
+
+ if (!infoCategoryWriter.catNameInfo.valid) {
+ const Reloc *catNameReloc =
+ catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
+
+ assert(catNameReloc && "Category does not have a reloc at nameOffset");
+
+ lld::macho::Defined *catDefSym =
+ dyn_cast_or_null<Defined>(catNameReloc->referent.dyn_cast<Symbol *>());
+ assert(catDefSym && "Reloc of category name is not a valid Defined symbol");
+
+ collectSectionWriteInfoFromIsec<CStringSection>(
+ catDefSym->isec, infoCategoryWriter.catNameInfo);
+ }
+
+ // Collect writer info from all the category lists (we're assuming they all
+ // would provide the same info)
+ if (!infoCategoryWriter.catPtrListInfo.valid) {
+ for (uint32_t off = catLayout.instanceMethodsOffset;
+ off <= catLayout.classPropsOffset; off += target->wordSize) {
+ Defined *ptrList;
+ if (tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off, ptrList)) {
+ collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+ ptrList->isec, infoCategoryWriter.catPtrListInfo);
+ // we've successfully collected data, so we can break
+ break;
+ }
+ }
+ }
+}
+
+// Parse a protocol list that might be linked to at a ConcatInputSection given
+// offset. The format of the protocol list is different than other lists (prop
+// lists, method lists) so we need to parse it differently
+void ObjcCategoryMerger::parseProtocolListInfo(ConcatInputSection *isec,
+ uint32_t secOffset,
+ PointerListInfo &ptrList) {
+ if (!isec || (secOffset + target->wordSize > isec->data.size()))
+ assert("Tried to read pointer list beyond protocol section end");
+
+ const Reloc *reloc = isec->getRelocAt(secOffset);
+ if (!reloc)
+ return; // List is null, nothing to do
+
+ auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
+
+ // Theoretically protocol count can be either 32b or 64b, but reading the
+ // first 32b is good enough
+ uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
+ ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+
+ ptrList.structCount += protocolCount;
+ ptrList.structSize = target->wordSize;
+
+ uint32_t expectedListSize =
+ (protocolCount * target->wordSize) +
+ /*header(count)*/ protocolListHeaderLayout.totalSize +
+ /*extra null value*/ target->wordSize;
+ assert(expectedListSize == ptrListSym->isec->data.size() &&
+ "Protocol list does not match expected size");
+
+ uint32_t off = protocolListHeaderLayout.totalSize;
+ for (uint32_t inx = 0; inx < protocolCount; inx++) {
+ const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ assert(reloc && "No reloc found at protocol list offset");
+
+ auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ assert(listSym && "Protocol list reloc does not have a valid Defined");
+
+ ptrList.allPtrs.push_back(listSym);
+ off += target->wordSize;
+ }
+}
+
+// Parse a pointer list that might be linked to at a ConcatInputSection given
+// offset. This can be used for instance methods, class methods, instance props
+// and class props since they have the same format.
+void ObjcCategoryMerger::parsePointerListInfo(ConcatInputSection *isec,
----------------
kyulee-com wrote:
`const ConcatInputSection *isec,` instead of `ConcatInputSection *isec,`.
Nit: can you also switch the parameter order so that the output `ptrList` comes first?
https://github.com/llvm/llvm-project/pull/82928
More information about the llvm-commits
mailing list