[lld] [llvm] [lld-macho] Implement ObjC category merging (-objc_category_merging) (PR #82928)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 8 21:40:55 PST 2024
================
@@ -320,3 +332,914 @@ void objc::checkCategories() {
}
}
}
+
+namespace {
+
+class ObjcCategoryMerger {
+ // Information about an input category
+ struct InfoInputCategory {
+ ConcatInputSection *catListIsec;
+ ConcatInputSection *catBodyIsec;
+ uint32_t offCatListIsec = 0;
+
+ bool wasMerged = false;
+ };
+
+ // To write new (merged) categories or classes, we will try make limited
+ // assumptions about the alignment and the sections the various class/category
+ // info are stored in and . So we'll just reuse the same sections and
+ // alignment as already used in existing (input) categories. To do this we
+ // have InfoCategoryWriter which contains the various sections that the
+ // generated categories will be written to.
+ template <typename T> struct InfoWriteSection {
+ bool valid = false; // Data has been successfully collected from input
+ uint32_t align = 0;
+ Section *inputSection;
+ Reloc relocTemplate;
+ T *outputSection;
+ };
+
+ struct InfoCategoryWriter {
+ InfoWriteSection<ConcatOutputSection> catListInfo;
+ InfoWriteSection<ConcatOutputSection> catBodyInfo;
+ InfoWriteSection<CStringSection> catNameInfo;
+ InfoWriteSection<ConcatOutputSection> catPtrListInfo;
+ };
+
+ // Information about a pointer list in the original categories (method lists,
+ // protocol lists, etc)
+ struct PointerListInfo {
+ PointerListInfo(const char *_categoryPrefix, uint32_t _categoryOffset,
+ uint32_t _pointersPerStruct)
+ : categoryPrefix(_categoryPrefix), categoryOffset(_categoryOffset),
+ pointersPerStruct(_pointersPerStruct) {}
+ const char *categoryPrefix;
+ uint32_t categoryOffset = 0;
+
+ uint32_t pointersPerStruct = 0;
+
+ uint32_t structSize = 0;
+ uint32_t structCount = 0;
+
+ std::vector<Symbol *> allPtrs;
+ };
+
+ // Full information about all the categories that extend a class. This will
+ // include all the additional methods, protocols, and properties that are
+ // contained in all the categories that extend a particular class.
+ struct ClassExtensionInfo {
+ ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
+
+ // Merged names of containers. Ex: base|firstCategory|secondCategory|...
+ std::string mergedContainerName;
+ std::string baseClassName;
+ Symbol *baseClass = nullptr;
+ CategoryLayout &catLayout;
+
+ // In case we generate new data, mark the new data as belonging to this file
+ ObjFile *objFileForMergeData = nullptr;
+
+ PointerListInfo instanceMethods = {
+ objc::symbol_names::categoryInstanceMethods,
+ /*_categoryOffset=*/catLayout.instanceMethodsOffset,
+ /*pointersPerStruct=*/3};
+ PointerListInfo classMethods = {
+ objc::symbol_names::categoryClassMethods,
+ /*_categoryOffset=*/catLayout.classMethodsOffset,
+ /*pointersPerStruct=*/3};
+ PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
+ /*_categoryOffset=*/catLayout.protocolsOffset,
+ /*pointersPerStruct=*/0};
+ PointerListInfo instanceProps = {
+ objc::symbol_names::listProprieties,
+ /*_categoryOffset=*/catLayout.instancePropsOffset,
+ /*pointersPerStruct=*/2};
+ PointerListInfo classProps = {
+ objc::symbol_names::klassPropList,
+ /*_categoryOffset=*/catLayout.classPropsOffset,
+ /*pointersPerStruct=*/2};
+ };
+
+public:
+ ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
+ void doMerge();
+ static void doCleanup();
+
+private:
+ void collectAndValidateCategoriesData();
+ void
+ mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+
+ void eraseISec(ConcatInputSection *isec);
+ void eraseMergedCategories();
+
+ void generateCatListForNonErasedCategories(
+ std::map<ConcatInputSection *, std::set<uint64_t>>
+ catListToErasedOffsets);
+ template <typename T>
+ void collectSectionWriteInfoFromIsec(const InputSection *isec,
+ InfoWriteSection<T> &catWriteInfo);
+ void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
+ void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
+ ClassExtensionInfo &extInfo);
+
+ void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
+ PointerListInfo &ptrList);
+
+ void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
+ PointerListInfo &ptrList);
+
+ void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
+ const ClassExtensionInfo &extInfo,
+ const PointerListInfo &ptrList);
+
+ void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
+ const ClassExtensionInfo &extInfo,
+ const PointerListInfo &ptrList);
+
+ Defined *emitCategory(const ClassExtensionInfo &extInfo);
+ Defined *emitCatListEntrySec(const std::string &forCateogryName,
+ const std::string &forBaseClassName,
+ ObjFile *objFile);
+ Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
+ const Symbol *baseClassSym,
+ const std::string &baseClassName, ObjFile *objFile);
+ Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
+ void createSymbolReference(Defined *refFrom, const Symbol *refTo,
+ uint32_t offset, const Reloc &relocTemplate);
+ Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
+ uint32_t offset);
+ Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
+ uint32_t offset);
+ void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
+ uint32_t offset);
+
+ // Allocate a null-terminated StringRef backed by generatedSectionData
+ StringRef newStringData(const char *str);
+ // Allocate section data, backed by generatedSectionData
+ SmallVector<uint8_t> &newSectionData(uint32_t size);
+
+ CategoryLayout catLayout;
+ ClassLayout classLayout;
+ ROClassLayout roClassLayout;
+ ListHeaderLayout listHeaderLayout;
+ MethodLayout methodLayout;
+ ProtocolListHeaderLayout protocolListHeaderLayout;
+
+ InfoCategoryWriter infoCategoryWriter;
+ std::vector<ConcatInputSection *> &allInputSections;
+ // Map of base class Symbol to list of InfoInputCategory's for it
+ std::map<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+
+ // Normally, the binary data comes from the input files, but since we're
+ // generating binary data ourselves, we use the below array to store it in.
+ // Need this to be 'static' so the data survives past the ObjcCategoryMerger
+ // object, as the data will be read by the Writer when the final binary is
+ // generated.
+ static SmallVector<SmallVector<uint8_t>> generatedSectionData;
+};
+
+SmallVector<SmallVector<uint8_t>> ObjcCategoryMerger::generatedSectionData;
+
+ObjcCategoryMerger::ObjcCategoryMerger(
+ std::vector<ConcatInputSection *> &_allInputSections)
+ : catLayout(target->wordSize), classLayout(target->wordSize),
+ roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
+ methodLayout(target->wordSize),
+ protocolListHeaderLayout(target->wordSize),
+ allInputSections(_allInputSections) {}
+
+// This is a template so that it can be used both for CStringSection and
+// ConcatOutputSection
+template <typename T>
+void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
+ const InputSection *isec, InfoWriteSection<T> &catWriteInfo) {
+
+ catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
+ catWriteInfo.align = isec->align;
+ catWriteInfo.outputSection = dyn_cast_or_null<T>(isec->parent);
+
+ assert(catWriteInfo.outputSection &&
+ "outputSection may not be null in collectSectionWriteInfoFromIsec.");
+
+ if (isec->relocs.size())
+ catWriteInfo.relocTemplate = isec->relocs[0];
+
+ catWriteInfo.valid = true;
+}
+
+Symbol *
+ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
+ uint32_t offset) {
+ const Reloc *reloc = isec->getRelocAt(offset);
+
+ if (!reloc)
+ return nullptr;
+
+ return reloc->referent.get<Symbol *>();
+}
+
+Defined *
+ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
+ uint32_t offset) {
+ Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
+ return dyn_cast_or_null<Defined>(sym);
+}
+
+// Given an ConcatInputSection or CStringInputSection and an offset, if there is
+// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
+void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
+ const ConcatInputSection *isec, uint32_t offset) {
+ const Reloc *reloc = isec->getRelocAt(offset);
+
+ if (!reloc)
+ return;
+
+ Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ if (!sym)
+ return;
+
+ if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec))
+ eraseISec(cisec);
+ else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
+ uint32_t totalOffset = sym->value + reloc->addend;
+ StringPiece &piece = csisec->getStringPiece(totalOffset);
+ piece.live = false;
+ } else {
+ llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
+ }
+}
+
+void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
+ const InfoInputCategory &catInfo) {
+
+ if (!infoCategoryWriter.catListInfo.valid)
+ collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+ catInfo.catListIsec, infoCategoryWriter.catListInfo);
+ if (!infoCategoryWriter.catBodyInfo.valid)
+ collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+ catInfo.catBodyIsec, infoCategoryWriter.catBodyInfo);
+
+ if (!infoCategoryWriter.catNameInfo.valid) {
+ lld::macho::Defined *catNameSym =
+ tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
+ assert(catNameSym && "Category does not have a valid name Symbol");
+
+ collectSectionWriteInfoFromIsec<CStringSection>(
+ catNameSym->isec, infoCategoryWriter.catNameInfo);
+ }
+
+ // Collect writer info from all the category lists (we're assuming they all
+ // would provide the same info)
+ if (!infoCategoryWriter.catPtrListInfo.valid) {
+ for (uint32_t off = catLayout.instanceMethodsOffset;
+ off <= catLayout.classPropsOffset; off += target->wordSize) {
+ if (Defined *ptrList =
+ tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
+ collectSectionWriteInfoFromIsec<ConcatOutputSection>(
+ ptrList->isec, infoCategoryWriter.catPtrListInfo);
+ // we've successfully collected data, so we can break
+ break;
+ }
+ }
+ }
+}
+
+// Parse a protocol list that might be linked to ConcatInputSection at a given
+// offset. The format of the protocol list is different than other lists (prop
+// lists, method lists) so we need to parse it differently
+void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
+ uint32_t secOffset,
+ PointerListInfo &ptrList) {
+ if (!isec || (secOffset + target->wordSize > isec->data.size()))
+ assert("Tried to read pointer list beyond protocol section end");
+
+ const Reloc *reloc = isec->getRelocAt(secOffset);
+ if (!reloc)
+ return;
+
+ auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
+
+ // Theoretically protocol count can be either 32b or 64b, depending on
+ // platform pointer size, but to simplify implementation we always just read
+ // the lower 32b which should be good enough.
+ uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
+ ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+
+ ptrList.structCount += protocolCount;
+ ptrList.structSize = target->wordSize;
+
+ uint32_t expectedListSize =
+ (protocolCount * target->wordSize) +
+ /*header(count)*/ protocolListHeaderLayout.totalSize +
+ /*extra null value*/ target->wordSize;
+ assert(expectedListSize == ptrListSym->isec->data.size() &&
+ "Protocol list does not match expected size");
+
+ uint32_t off = protocolListHeaderLayout.totalSize;
+ for (uint32_t inx = 0; inx < protocolCount; ++inx) {
+ const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ assert(reloc && "No reloc found at protocol list offset");
+
+ auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ assert(listSym && "Protocol list reloc does not have a valid Defined");
+
+ ptrList.allPtrs.push_back(listSym);
+ off += target->wordSize;
+ }
+ assert((ptrListSym->isec->getRelocAt(off) == nullptr) &&
+ "expected null terminating protocol");
+ assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
+ "Protocol list end offset does not match expected size");
+}
+
+// Parse a pointer list that might be linked to ConcatInputSection at a given
+// offset. This can be used for instance methods, class methods, instance props
+// and class props since they have the same format.
+void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
+ uint32_t secOffset,
+ PointerListInfo &ptrList) {
+ assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
+ assert(isec && "Trying to parse pointer list from null isec");
+ assert(secOffset + target->wordSize <= isec->data.size() &&
+ "Trying to read pointer list beyond section end");
+
+ const Reloc *reloc = isec->getRelocAt(secOffset);
+ if (!reloc)
+ return;
+
+ auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ assert(ptrListSym && "Reloc does not have a valid Defined");
+
+ uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
+ ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+ uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
+ ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
+ assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
+
+ assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
+
+ ptrList.structCount += thisStructCount;
+ ptrList.structSize = thisStructSize;
+
+ uint32_t expectedListSize =
+ listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
+ assert(expectedListSize == ptrListSym->isec->data.size() &&
+ "Pointer list does not match expected size");
+
+ for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
+ off += target->wordSize) {
+ const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ assert(reloc && "No reloc found at pointer list offset");
+
+ auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+ assert(listSym && "Reloc does not have a valid Defined");
+
+ ptrList.allPtrs.push_back(listSym);
+ }
+}
+
+// Here we parse all the information of an input category (catInfo) and
+// append the parsed info into the structure which will contain all the
+// information about how a class is extended (extInfo)
+void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
+ ClassExtensionInfo &extInfo) {
+ const Reloc *catNameReloc =
+ catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
+
+ // Parse name
+ assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
+
+ // is this the first category we are parsing?
+ if (extInfo.mergedContainerName.empty())
+ extInfo.objFileForMergeData =
+ dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
+ else
+ extInfo.mergedContainerName += "|";
+
+ assert(extInfo.objFileForMergeData &&
+ "Expected to already have valid objextInfo.objFileForMergeData");
+
+ StringRef catName = getReferentString(*catNameReloc);
+ extInfo.mergedContainerName += catName.str();
+
+ // Parse base class
+ if (!extInfo.baseClass) {
+ Symbol *classSym =
+ tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
+ assert(extInfo.baseClassName.empty());
+ extInfo.baseClass = classSym;
+ llvm::StringRef classPrefix(objc::symbol_names::klass);
+ assert(classSym->getName().starts_with(classPrefix) &&
+ "Base class symbol does not start with expected prefix");
+ extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
+ } else {
+ assert((extInfo.baseClass ==
+ tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
+ catLayout.klassOffset)) &&
+ "Trying to parse category info into container with different base "
+ "class");
+ }
+
+ parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,
+ extInfo.instanceMethods);
+
+ parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
+ extInfo.classMethods);
+
+ parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
+ extInfo.protocols);
+
+ parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
+ extInfo.instanceProps);
+
+ parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
+ extInfo.classProps);
+}
+
+// Generate a protocol list (including header) and link it into the parent at
+// the specified offset.
+void ObjcCategoryMerger::emitAndLinkProtocolList(
+ Defined *parentSym, uint32_t linkAtOffset,
+ const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
+ if (ptrList.allPtrs.empty())
+ return;
+
+ assert(ptrList.allPtrs.size() == ptrList.structCount);
+
+ uint32_t bodySize = (ptrList.structCount * target->wordSize) +
+ /*header(count)*/ protocolListHeaderLayout.totalSize +
+ /*extra null value*/ target->wordSize;
+ llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
+
+ // This theoretically can be either 32b or 64b, but writing just the first 32b
+ // is good enough
+ const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
+ bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
+
+ *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
+
+ ConcatInputSection *listSec = make<ConcatInputSection>(
+ *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
+ infoCategoryWriter.catPtrListInfo.align);
+ listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+ listSec->live = true;
+ allInputSections.push_back(listSec);
+
+ listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+
+ std::string symName = ptrList.categoryPrefix;
+ symName += extInfo.baseClassName + "_$_(" + extInfo.mergedContainerName + ")";
+
+ Defined *ptrListSym = make<Defined>(
+ newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
+ listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
+ /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
+ /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
+ /*isWeakDefCanBeHidden=*/false);
+
+ ptrListSym->used = true;
+ parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+
+ createSymbolReference(parentSym, ptrListSym, linkAtOffset,
+ infoCategoryWriter.catBodyInfo.relocTemplate);
+
+ uint32_t offset = protocolListHeaderLayout.totalSize;
+ for (Symbol *symbol : ptrList.allPtrs) {
+ createSymbolReference(ptrListSym, symbol, offset,
+ infoCategoryWriter.catPtrListInfo.relocTemplate);
+ offset += target->wordSize;
+ }
+}
+
+// Generate a pointer list (including header) and link it into the parent at the
+// specified offset. This is used for instance and class methods and
+// proprieties.
+void ObjcCategoryMerger::emitAndLinkPointerList(
+ Defined *parentSym, uint32_t linkAtOffset,
+ const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
+ if (ptrList.allPtrs.empty())
+ return;
+
+ assert(ptrList.allPtrs.size() * target->wordSize ==
+ ptrList.structCount * ptrList.structSize);
+
+ // Generate body
+ uint32_t bodySize =
+ listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
+ llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
+
+ const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
+ bodyData.data() + listHeaderLayout.structSizeOffset);
+ const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
+ bodyData.data() + listHeaderLayout.structCountOffset);
+
+ *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
+ *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
+
+ ConcatInputSection *listSec = make<ConcatInputSection>(
+ *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
+ infoCategoryWriter.catPtrListInfo.align);
+ listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+ listSec->live = true;
+ allInputSections.push_back(listSec);
+
+ listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
+
+ std::string symName = ptrList.categoryPrefix;
+ symName += extInfo.baseClassName + "_$_" + extInfo.mergedContainerName;
+
+ Defined *ptrListSym = make<Defined>(
+ newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
+ listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
+ /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
+ /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
+ /*isWeakDefCanBeHidden=*/false);
+
+ ptrListSym->used = true;
+ parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+
+ createSymbolReference(parentSym, ptrListSym, linkAtOffset,
+ infoCategoryWriter.catBodyInfo.relocTemplate);
+
+ uint32_t offset = listHeaderLayout.totalSize;
+ for (Symbol *symbol : ptrList.allPtrs) {
+ createSymbolReference(ptrListSym, symbol, offset,
+ infoCategoryWriter.catPtrListInfo.relocTemplate);
+ offset += target->wordSize;
+ }
+}
+
+// This method creates an __objc_catlist ConcatInputSection with a single slot
+Defined *
+ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
+ const std::string &forBaseClassName,
+ ObjFile *objFile) {
+ uint32_t sectionSize = target->wordSize;
+ llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
+
+ ConcatInputSection *newCatList =
+ make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
+ bodyData, infoCategoryWriter.catListInfo.align);
+ newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
+ newCatList->live = true;
+ allInputSections.push_back(newCatList);
+
+ newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
+
+ std::string catSymName = "<__objc_catlist slot for merged category ";
+ catSymName += forBaseClassName + "(" + forCateogryName + ")>";
+
+ Defined *catListSym = make<Defined>(
+ newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
+ /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
+ /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
+ /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
+ /*isWeakDefCanBeHidden=*/false);
+
+ catListSym->used = true;
+ objFile->symbols.push_back(catListSym);
+ return catListSym;
+}
+
+// Here we generate the main category body and just the body and link the name
+// and base class into it. We don't link any other info like the protocol and
+// class/instance methods/props.
+Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
+ const Defined *nameSym,
+ const Symbol *baseClassSym,
+ const std::string &baseClassName,
+ ObjFile *objFile) {
+ llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
+
+ uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
+ catLayout.sizeOffset);
+ *ptrSize = catLayout.totalSize;
+
+ ConcatInputSection *newBodySec =
+ make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
+ bodyData, infoCategoryWriter.catBodyInfo.align);
+ newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
+ newBodySec->live = true;
+ allInputSections.push_back(newBodySec);
+
+ std::string symName =
+ objc::symbol_names::category + baseClassName + "_$_(" + name + ")";
+ Defined *catBodySym = make<Defined>(
+ newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
+ /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
+ /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
+ /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
+ /*isWeakDefCanBeHidden=*/false);
+
+ catBodySym->used = true;
+ objFile->symbols.push_back(catBodySym);
+
+ createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
+ infoCategoryWriter.catBodyInfo.relocTemplate);
+
+ // Create a reloc to the base class (either external or internal)
+ createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
+ infoCategoryWriter.catBodyInfo.relocTemplate);
+
+ return catBodySym;
+}
+
+// This writes the new category name (for the merged category) into the binary
+// and returns the sybmol for it.
+Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
+ ObjFile *objFile) {
+ StringRef nameStrData = newStringData(name.c_str());
+ // We use +1 below to include the null terminator
+ llvm::ArrayRef<uint8_t> nameData(
+ reinterpret_cast<const uint8_t *>(nameStrData.data()),
+ nameStrData.size() + 1);
+
+ auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
+ CStringInputSection *newStringSec = make<CStringInputSection>(
+ *infoCategoryWriter.catNameInfo.inputSection, nameData,
+ infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
+
+ parentSection->subsections.push_back({0, newStringSec});
+
+ newStringSec->splitIntoPieces();
+ newStringSec->pieces[0].live = true;
+ newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
+ in.cStringSection->addInput(newStringSec);
+ assert(newStringSec->pieces.size() == 1);
+
+ Defined *catNameSym = make<Defined>(
+ "<merged category name>", /*file=*/objFile, newStringSec,
+ /*value=*/0, nameData.size(),
+ /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+ /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
+ /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
+
+ catNameSym->used = true;
+ objFile->symbols.push_back(catNameSym);
+ return catNameSym;
+}
+
+// This method fully creates a new category from the given ClassExtensionInfo.
+// It creates the category name, body and method/protocol/prop lists and links
+// them all together. Then it creates a new __objc_catlist entry and adds the
+// category to it. Calling this method will fully generate a category which will
+// be available in the final binary.
+Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
+ Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
+ extInfo.objFileForMergeData);
+
+ Defined *catBodySym = emitCategoryBody(
+ extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
+ extInfo.baseClassName, extInfo.objFileForMergeData);
+
+ Defined *catListSym =
+ emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
+ extInfo.objFileForMergeData);
+
+ // Add the single category body to the category list at the offset 0.
+ createSymbolReference(catListSym, catBodySym, /*offset=*/0,
+ infoCategoryWriter.catListInfo.relocTemplate);
+
+ emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
+ extInfo.instanceMethods);
+
+ emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
+ extInfo.classMethods);
+
+ emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
+ extInfo.protocols);
+
+ emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
+ extInfo.instanceProps);
+
+ emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
+ extInfo.classProps);
+
+ return catBodySym;
+}
+
+// This method merges all the categories (sharing a base class) into a single
+// category.
+void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
+ std::vector<InfoInputCategory> &categories) {
+ assert(categories.size() > 1 && "Expected at least 2 categories");
+
+ ClassExtensionInfo extInfo(catLayout);
+
+ for (auto &catInfo : categories)
+ parseCatInfoToExtInfo(catInfo, extInfo);
+
+ Defined *newCatDef = emitCategory(extInfo);
+ assert(newCatDef && "Failed to create a new category");
+
+ for (auto &catInfo : categories)
+ catInfo.wasMerged = true;
+}
+
+void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
+ const Symbol *refTo,
+ uint32_t offset,
+ const Reloc &relocTemplate) {
+ Reloc r = relocTemplate;
+ r.offset = offset;
+ r.addend = 0;
+ r.referent = const_cast<Symbol *>(refTo);
+ refFrom->isec->relocs.push_back(r);
+}
+
+void ObjcCategoryMerger::collectAndValidateCategoriesData() {
+ for (InputSection *sec : allInputSections) {
+ if (sec->getName() != section_names::objcCatList)
+ continue;
+ ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
+ assert(catListCisec &&
+ "__objc_catList InputSection is not a ConcatInputSection");
+
+ for (uint32_t off = 0; off < catListCisec->getSize();
+ off += target->wordSize) {
+ Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
+ assert(categorySym &&
+ "Failed to get a valid cateogry at __objc_catlit offset");
+ if (!categorySym->getName().starts_with(objc::symbol_names::category))
+ continue; // Only support ObjC categories (no swift + @objc)
----------------
kyulee-com wrote:
Can you place the comment to the head of the line?
https://github.com/llvm/llvm-project/pull/82928
More information about the llvm-commits
mailing list