[lld] [lld-macho] Implement ObjC category merging (-merge-objc-categories) (PR #82928)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 25 09:11:04 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
Author: None (alx32)
<details>
<summary>Changes</summary>
This change adds a flag to lld to enable category merging for MachoO + ObjC. Behavior: if in the same link unit, multiple categories are extending the same class, then they get merged into a single category.
Ex: `Cat1(method1+method2,protocol1) + Cat2(method3+method4,protocol2, property1) = Cat1_2(method1+method2+method3+method4, protocol1+protocol2, property1)`
Notes on implementation decisions made in this diff:
1. There is a possibility to even improve the current implementation by directly merging the category data into the base class (if the base class is present in the link unit) - this improvement may be done as a follow-up.
2. We do the merging as early as possible, on the raw inputSections.
3. We add a new flag for ObjFile (isLinkerGenerated) and create such an ObjFile to which all new linker-generated date belongs.
4. We add a new flag (linkerOptimizeReason) to ConcatInputSection and StringPiece to mark that this data has been optimized away. Another way to do it would have been to just mark the pieces as not 'live' but this would require some work-arounds in the actual live symbol determination logic and would also cause symbols to incorrectly show up as 'dead-stripped' when that's not the cause that they are not present.
---
Patch is 83.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82928.diff
11 Files Affected:
- (modified) lld/MachO/Driver.cpp (+9-4)
- (modified) lld/MachO/InputFiles.cpp (+18-12)
- (modified) lld/MachO/InputFiles.h (+2-1)
- (modified) lld/MachO/InputSection.h (+28-4)
- (modified) lld/MachO/MapFile.cpp (+5-1)
- (modified) lld/MachO/MarkLive.cpp (+2)
- (modified) lld/MachO/ObjC.cpp (+1036-6)
- (modified) lld/MachO/ObjC.h (+1-1)
- (modified) lld/MachO/Options.td (+3)
- (modified) lld/MachO/SyntheticSections.cpp (+4-4)
- (added) lld/test/MachO/objc-category-merging.s (+695)
``````````diff
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index a57f60c5eed36b..44b1df7592ce00 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1966,15 +1966,20 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
}
gatherInputSections();
+
+ // Run category checking & merging before anything else, it operates
+ // directly on inputSections.
+ if (args.hasArg(OPT_check_category_conflicts))
+ objc::checkCategories();
+
+ if (args.hasArg(OPT_merge_objc_categories))
+ objc::mergeCategories();
+
if (config->callGraphProfileSort)
priorityBuilder.extractCallGraphProfile();
if (config->deadStrip)
markLive();
-
- if (args.hasArg(OPT_check_category_conflicts))
- objc::checkCategories();
-
// ICF assumes that all literals have been folded already, so we must run
// foldIdenticalLiterals before foldIdenticalSections.
foldIdenticalLiterals();
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 158c3fbf7b0fca..2329dd57b6bf70 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -965,21 +965,23 @@ void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy, bool forceHidden, bool compatArch,
- bool builtFromBitcode)
+ bool builtFromBitcode, bool isLinkerGenerated)
: InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
- builtFromBitcode(builtFromBitcode) {
+ builtFromBitcode(builtFromBitcode), isLinkerGenerated(isLinkerGenerated) {
this->archiveName = std::string(archiveName);
this->compatArch = compatArch;
- if (lazy) {
- if (target->wordSize == 8)
- parseLazy<LP64>();
- else
- parseLazy<ILP32>();
- } else {
- if (target->wordSize == 8)
- parse<LP64>();
- else
- parse<ILP32>();
+ if (!isLinkerGenerated) {
+ if (lazy) {
+ if (target->wordSize == 8)
+ parseLazy<LP64>();
+ else
+ parseLazy<ILP32>();
+ } else {
+ if (target->wordSize == 8)
+ parse<LP64>();
+ else
+ parse<ILP32>();
+ }
}
}
@@ -1103,6 +1105,8 @@ void ObjFile::parseDebugInfo() {
}
ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
+ if (!mb.getBufferSize())
+ return {};
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
if (!cmd)
@@ -1113,6 +1117,8 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
}
ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
+ if (!mb.getBufferSize())
+ return {};
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
if (auto *cmd =
findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 5e550c167c232e..be3fd6119d4786 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -161,7 +161,7 @@ class ObjFile final : public InputFile {
public:
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy = false, bool forceHidden = false, bool compatArch = true,
- bool builtFromBitcode = false);
+ bool builtFromBitcode = false, bool isLinkerGenerated = false);
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
ArrayRef<uint8_t> getOptimizationHints() const;
template <class LP> void parse();
@@ -181,6 +181,7 @@ class ObjFile final : public InputFile {
const uint32_t modTime;
bool forceHidden;
bool builtFromBitcode;
+ bool isLinkerGenerated;
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index becb01017d633a..2cd514f8c55df5 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -24,6 +24,11 @@
namespace lld {
namespace macho {
+enum LinkerOptReason : uint8_t {
+ NotOptimized,
+ CategoryMerging,
+};
+
class InputFile;
class OutputSection;
@@ -60,6 +65,7 @@ class InputSection {
// Whether the data at \p off in this InputSection is live.
virtual bool isLive(uint64_t off) const = 0;
virtual void markLive(uint64_t off) = 0;
+ virtual bool isLinkOptimizedAway() const { return false; }
virtual InputSection *canonical() { return this; }
virtual const InputSection *canonical() const { return this; }
@@ -93,9 +99,9 @@ class InputSection {
// .subsections_via_symbols, there is typically only one element here.
llvm::TinyPtrVector<Defined *> symbols;
-protected:
const Section §ion;
+protected:
const Defined *getContainingSymbol(uint64_t off) const;
};
@@ -114,7 +120,12 @@ class ConcatInputSection final : public InputSection {
bool isLive(uint64_t off) const override { return live; }
void markLive(uint64_t off) override { live = true; }
bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
- bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
+ bool isLinkOptimizedAway() const override {
+ return linkerOptimizeReason != LinkerOptReason::NotOptimized;
+ }
+ bool shouldOmitFromOutput() const {
+ return isLinkOptimizedAway() || !live || isCoalescedWeak();
+ }
void writeTo(uint8_t *buf);
void foldIdentical(ConcatInputSection *redundant);
@@ -141,6 +152,11 @@ class ConcatInputSection final : public InputSection {
// first and not copied to the output.
bool wasCoalesced = false;
bool live = !config->deadStrip;
+ // Flag to specify if a linker optimzation flagged this section to be
+ // discarded. Need a separate flag from live as live specifically means
+ // 'dead-stripped' which is rellevant in contexts such as linker map
+ // generation
+ LinkerOptReason linkerOptimizeReason = LinkerOptReason::NotOptimized;
bool hasCallSites = false;
// This variable has two usages. Initially, it represents the input order.
// After assignAddresses is called, it represents the offset from the
@@ -176,10 +192,18 @@ struct StringPiece {
// Only set if deduplicating literals
uint32_t hash : 31;
// Offset from the start of the containing output section.
- uint64_t outSecOff = 0;
+ uint64_t outSecOff : 56;
+ LinkerOptReason linkerOptimizeReason : 8;
+
+ bool shouldOmitFromOutput() const {
+ return !live || linkerOptimizeReason != LinkerOptReason::NotOptimized;
+ }
StringPiece(uint64_t off, uint32_t hash)
- : inSecOff(off), live(!config->deadStrip), hash(hash) {}
+ : inSecOff(off), live(!config->deadStrip), hash(hash) {
+ outSecOff = 0;
+ linkerOptimizeReason = LinkerOptReason::NotOptimized;
+ }
};
static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index f736360624ebd1..455ebffbbd567a 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -80,7 +80,7 @@ static MapInfo gatherMapInfo() {
if (d->isec && d->getFile() == file &&
!isa<CStringInputSection>(d->isec)) {
isReferencedFile = true;
- if (!d->isLive())
+ if (!d->isLive() && (!d->isec || !d->isec->isLinkOptimizedAway()))
info.deadSymbols.push_back(d);
}
}
@@ -93,6 +93,8 @@ static MapInfo gatherMapInfo() {
if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
auto &liveCStrings = info.liveCStringsForSection[isec->parent];
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
+ if (piece.linkerOptimizeReason != LinkerOptReason::NotOptimized)
+ continue;
if (piece.live)
liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
{fileIndex, isec->getStringRef(i)}});
@@ -203,6 +205,8 @@ void macho::writeMapFile() {
for (const OutputSection *osec : seg->getSections()) {
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
for (const InputSection *isec : concatOsec->inputs) {
+ if (isec->isLinkOptimizedAway())
+ continue;
for (Defined *sym : isec->symbols)
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index a37213d5613afb..5eb189574dec1e 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -259,6 +259,8 @@ void markLive() {
dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
marker->addSym(stubBinder);
for (ConcatInputSection *isec : inputSections) {
+ if (isec->isLinkOptimizedAway())
+ continue;
// Sections marked no_dead_strip
if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) {
marker->enqueue(isec, 0);
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 67254ec53a2145..04827933753350 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -7,16 +7,19 @@
//===----------------------------------------------------------------------===//
#include "ObjC.h"
+#include "ConcatOutputSection.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "Layout.h"
#include "OutputSegment.h"
+#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
using namespace llvm::MachO;
@@ -78,7 +81,9 @@ namespace {
DO(Ptr, classMethods) \
DO(Ptr, protocols) \
DO(Ptr, instanceProps) \
- DO(Ptr, classProps)
+ DO(Ptr, classProps) \
+ DO(uint32_t, size) \
+ DO(uint32_t, padding)
CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
@@ -112,13 +117,19 @@ CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
#undef FOR_EACH_RO_CLASS_FIELD
#define FOR_EACH_LIST_HEADER(DO) \
- DO(uint32_t, size) \
- DO(uint32_t, count)
+ DO(uint32_t, structSize) \
+ DO(uint32_t, structCount)
CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
#undef FOR_EACH_LIST_HEADER
+#define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
+
+CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
+
+#undef FOR_EACH_PROTOCOL_LIST_HEADER
+
#define FOR_EACH_METHOD(DO) \
DO(Ptr, name) \
DO(Ptr, type) \
@@ -176,13 +187,22 @@ ObjcCategoryChecker::ObjcCategoryChecker()
roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
methodLayout(target->wordSize) {}
-// \p r must point to an offset within a cstring section.
+// \p r must point to an offset within a cstring section or ConcatInputSection
static StringRef getReferentString(const Reloc &r) {
if (auto *isec = r.referent.dyn_cast<InputSection *>())
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
auto *sym = cast<Defined>(r.referent.get<Symbol *>());
- return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(sym->value +
- r.addend);
+ uint32_t dataOff = sym->value + r.addend;
+ if (auto *cisec = dyn_cast<ConcatInputSection>(sym->isec)) {
+ uint32_t buffSize = cisec->data.size();
+ const char *pszBuff = reinterpret_cast<const char *>(cisec->data.data());
+ assert(dataOff < buffSize);
+ uint32_t sLen = strnlen(pszBuff + dataOff, buffSize - dataOff);
+ llvm::StringRef strRef(pszBuff + dataOff, sLen);
+ assert(strRef.size() > 0 && "getReferentString returning empty string");
+ return strRef;
+ }
+ return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(dataOff);
}
void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
@@ -311,6 +331,8 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
}
void objc::checkCategories() {
+ TimeTraceScope timeScope("ObjcCategoryChecker");
+
ObjcCategoryChecker checker;
for (const InputSection *isec : inputSections) {
if (isec->getName() == section_names::objcCatList)
@@ -320,3 +342,1011 @@ void objc::checkCategories() {
}
}
}
+
+namespace {
+
+class ObjcCategoryMerger {
+ // Information about an input category
+ struct InfoInputCategory {
+ ConcatInputSection *catBodyIsec;
+ ConcatInputSection *catListIsec;
+ uint32_t offCatListIsec = 0;
+
+ bool wasMerged = false;
+ };
+
+ // To write new (merged) categories or classes, we will try make limited
+ // assumptions about the alignment and the sections the various class/category
+ // info are stored in and . So we'll just reuse the same sections and
+ // alignment as already used in existing (input) categories. To do this we
+ // have InfoCategoryWriter which contains the various sections that the
+ // generated categories will be written to.
+ template <typename T> struct InfroWriteSection {
+ bool valid = false; // Data has been successfully collected from input
+ uint32_t align = 0;
+ const Section *inputSection;
+ Reloc relocTemplate;
+ T *outputSection;
+ };
+
+ struct InfoCategoryWriter {
+ InfroWriteSection<ConcatOutputSection> catListInfo;
+ InfroWriteSection<CStringSection> catNameInfo;
+ InfroWriteSection<ConcatOutputSection> catBodyInfo;
+ InfroWriteSection<ConcatOutputSection> catPtrListInfo;
+
+ // Linker-generated ObjFile for all the binary data that we will be
+ // generating (category body, method lists, strings, etc ...)
+ ObjFile *generatedDataObjFile = nullptr;
+ };
+
+ // Information about a pointer list in the original categories (method lists,
+ // protocol lists, etc)
+ struct PointerListInfo {
+ PointerListInfo(const char *pszSymNamePrefix)
+ : namePrefix(pszSymNamePrefix) {}
+ const char *namePrefix;
+
+ uint32_t structSize = 0;
+ uint32_t structCount = 0;
+
+ std::vector<Symbol *> allPtrs;
+ };
+
+ // Full information about all the categories that are extending a class. This
+ // will have all the additional methods, protocols, proprieties that are
+ // contained in all the categories that extend a particular class.
+ struct ClassExtensionInfo {
+ // Merged names of containers. Ex: base|firstCategory|secondCategory|...
+ std::string mergedContainerName;
+ std::string baseClassName;
+ Symbol *baseClass = nullptr;
+
+ PointerListInfo instanceMethods = "__OBJC_$_CATEGORY_INSTANCE_METHODS_";
+ PointerListInfo classMethods = "__OBJC_$_CATEGORY_CLASS_METHODS_";
+ PointerListInfo protocols = "__OBJC_CATEGORY_PROTOCOLS_$_";
+ PointerListInfo instanceProps = "__OBJC_$_PROP_LIST_";
+ PointerListInfo classProps = "__OBJC_$_CLASS_PROP_LIST_";
+ };
+
+public:
+ ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
+ bool doMerge();
+
+private:
+ // This returns bool and always false for easy 'return false;' statements
+ bool registerError(const char *msg);
+
+ bool collectAndValidateCategoriesData();
+ bool
+ mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+ bool eraseMergedCategories();
+
+ ObjFile *getGenObjFile();
+
+ bool generateCatListForNonErasedCategories(
+ std::map<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets,
+ uint32_t remainingCategories);
+ template <typename T>
+ bool collectSectionWriteInfoFromIsec(InputSection *isec,
+ InfroWriteSection<T> &catWriteInfo);
+ bool collectCategoryWriterInfoFromCategory(InfoInputCategory &catInfo);
+ bool parseCatInfoToExtInfo(InfoInputCategory &catInfo,
+ ClassExtensionInfo &extInfo);
+
+ bool tryParseProtocolListInfo(ConcatInputSection *isec,
+ uint32_t symbolsPerStruct,
+ PointerListInfo &ptrList);
+
+ bool parsePointerListInfo(ConcatInputSection *isec, uint32_t secOffset,
+ uint32_t symbolsPerStruct,
+ PointerListInfo &ptrList);
+
+ bool emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
+ ClassExtensionInfo &extInfo,
+ PointerListInfo &ptrList);
+
+ bool emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
+ ClassExtensionInfo &extInfo,
+ PointerListInfo &ptrList);
+
+ bool emitCategory(ClassExtensionInfo &extInfo, Defined *&catBodySym);
+ bool emitCatListEntrySec(std::string &forCateogryName,
+ std::string &forBaseClassName, Defined *&catListSym);
+ bool emitCategoryBody(std::string &name, Defined *nameSym,
+ Symbol *baseClassSym, Defined *&catBodySym);
+ bool emitCategoryName(std::string &name, Defined *&catNameSym);
+ bool createSymbolReference(Defined *refFrom, Symbol *refTo, uint32_t offset,
+ Reloc &relocTemplate);
+ bool tryGetSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+ Symbol *&sym);
+ bool tryGetDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+ Defined *&defined);
+ bool tryEraseDefinedAtIsecOffset(ConcatInputSection *isec, uint32_t offset,
+ bool stringOnly = false);
+
+ CategoryLayout catLayout;
+ ClassLayout classLayout;
+ ROClassLayout roClassLayout;
+ ListHeaderLayout listHeaderLayout;
+ MethodLayout methodLayout;
+ ProtocolListHeaderLayout protocolListHeaderLayout;
+
+ InfoCategoryWriter infoCategoryWriter;
+ std::vector<ConcatInputSection *> &allInputSections;
+ // Map of base class Symbol to list of InfoInputCategory's for it
+ std::map<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+
+ // Normally, the binary data comes from the input files, but since we're
+ // generating binary data ourselves, we use the below arrays to store it in.
+ // Need this to be 'static' so the data survives past the ObjcCategoryMerger
+ // object, as the data will be read by the Writer when the final binary is
+ // generated.
+ static SmallVector<SmallString<0>> generatedNames;
+ static SmallVector<SmallVector<uint8_t>> generatedSectionData;
+};
+
+SmallVector<SmallString<0>> ObjcCategoryMerger::generatedNames;
+SmallVector<SmallVector<uint8_t>> ObjcCategoryMerger::generatedSectionData;
+
+ObjcCategoryMerger::ObjcCategoryMerger(
+ std::vector<ConcatInputSection *> &_allInputSections)
+ : catLayout(target->wordSize), classLayout(target->wordSize),
+ roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
+ methodLayout(target->wordSize),
+ protocolListHeaderLayout(target->wordSize),
+ allInputSections(_allInputSections) {}
+
+bool ObjcCategoryMerger::registerError(const char *msg) {
+ std::string err = "ObjC category merging error[-merge-objc-categories]: ";
+ err += msg;
+ error(err);
+ return false; // Always return false for easy 'return registerError()' syntax.
+}
+
+ObjFile *ObjcCategoryMerger::getGenObjFile() {
+ // Only create the linker-generated ObjFile on-demand - so if it's not needed
+ // (i.e. no categories are to be merged) then we don't need to create it.
+ if (infoCategoryWriter.generatedDataObjFile)
+ return infoCategoryWriter.generatedDataObjFile;
+
+ SmallString<32> objBuf;
+
+ infoCategoryWriter.generatedDataObjFile = make<ObjFile>(
+ MemoryBufferRef(objBuf, "<-merge-objc-ca...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/82928
More information about the llvm-commits
mailing list