[lld] [LLD][MachO] Option to emit separate cstring sections (PR #158720)
Ellis Hoag via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 11:45:04 PDT 2025
https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/158720
>From c9ee8b2867fa961932bdddc2425e6ed96564a175 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Mon, 15 Sep 2025 12:10:35 -0700
Subject: [PATCH 1/2] [LLD][MachO] Option to emit separate cstring sections
---
lld/MachO/Config.h | 1 +
lld/MachO/Driver.cpp | 6 +++++-
lld/MachO/InputSection.cpp | 9 ++++++---
lld/MachO/MapFile.cpp | 4 +++-
lld/MachO/ObjC.cpp | 2 +-
lld/MachO/Options.td | 7 +++++++
lld/MachO/SyntheticSections.h | 17 ++++++++++++++++-
lld/MachO/Writer.cpp | 7 ++-----
lld/test/MachO/cstring.ll | 32 ++++++++++++++++++++++++++++++++
9 files changed, 73 insertions(+), 12 deletions(-)
create mode 100644 lld/test/MachO/cstring.ll
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 19dba790c1c7c..51b1363d87615 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -222,6 +222,7 @@ struct Configuration {
bool pgoWarnMismatch;
bool warnThinArchiveMissingMembers;
bool disableVerify;
+ bool separateCstringLiteralSections;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3db638e1ead96..f54b6bbdc155c 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1520,7 +1520,8 @@ static void foldIdenticalLiterals() {
// We always create a cStringSection, regardless of whether dedupLiterals is
// true. If it isn't, we simply create a non-deduplicating CStringSection.
// Either way, we must unconditionally finalize it here.
- in.cStringSection->finalizeContents();
+ for (auto &[name, sec] : in.cStringSectionMap)
+ sec->finalizeContents();
in.objcMethnameSection->finalizeContents();
in.wordLiteralSection->finalizeContents();
}
@@ -1981,6 +1982,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
OPT_no_warn_thin_archive_missing_members, true);
config->generateUuid = !args.hasArg(OPT_no_uuid);
config->disableVerify = args.hasArg(OPT_disable_verify);
+ config->separateCstringLiteralSections =
+ args.hasFlag(OPT_separate_cstring_literal_sections,
+ OPT_no_separate_cstring_literal_sections, false);
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 18b3ff961085b..b7718db45aef6 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -68,9 +68,12 @@ void lld::macho::addInputSection(InputSection *inputSection) {
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
in.objcMethnameSection->addInput(isec);
} else {
- if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
- in.cStringSection->inputOrder = inputSectionsOrder++;
- in.cStringSection->addInput(isec);
+ auto *osec = in.getOrCreateCStringSection(
+ config->separateCstringLiteralSections ? isec->getName()
+ : section_names::cString);
+ if (osec->inputOrder == UnspecifiedInputOrder)
+ osec->inputOrder = inputSectionsOrder++;
+ osec->addInput(isec);
}
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index f3e221a700b14..5e88e19697d67 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -239,7 +239,9 @@ void macho::writeMapFile() {
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
- } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
+ } else if (any_of(in.cStringSectionMap,
+ [&](auto &it) { return osec == it.getValue(); }) ||
+ osec == in.objcMethnameSection) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
uint64_t lastAddr = 0; // strings will never start at address 0, so this
// is a sentinel value
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index ab7f73c3a1df6..794b92d2c9d40 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
newStringSec->splitIntoPieces();
newStringSec->pieces[0].live = true;
newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
- in.cStringSection->addInput(newStringSec);
+ in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec);
assert(newStringSec->pieces.size() == 1);
Defined *catNameSym = make<Defined>(
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 8ae50f380741a..4eeb8fbe11121 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">,
def ignore_auto_link : Flag<["-"], "ignore_auto_link">,
HelpText<"Ignore LC_LINKER_OPTIONs">,
Group<grp_rare>;
+defm separate_cstring_literal_sections
+ : BB<"separate-cstring-literal-sections",
+ "Emit all cstring literals into their respective sections defined by "
+ "their section names.",
+ "Emit all cstring literals into the __cstring section. As a special "
+ "case, the __objc_methname section will still be emitted. (default)">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5796b0790c83a..130b2d73af810 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -843,7 +843,7 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
- CStringSection *cStringSection = nullptr;
+ llvm::StringMap<CStringSection *> cStringSectionMap;
DeduplicatedCStringSection *objcMethnameSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr;
RebaseSection *rebase = nullptr;
@@ -863,6 +863,21 @@ struct InStruct {
InitOffsetsSection *initOffsets = nullptr;
ObjCMethListSection *objcMethList = nullptr;
ChainedFixupsSection *chainedFixups = nullptr;
+
+ CStringSection *getOrCreateCStringSection(StringRef name) {
+ auto it = cStringSectionMap.find(name);
+ if (it != cStringSectionMap.end())
+ return it->getValue();
+
+ std::string &nameData = *make<std::string>(name);
+ CStringSection *sec;
+ if (config->dedupStrings)
+ sec = make<DeduplicatedCStringSection>(nameData.c_str());
+ else
+ sec = make<CStringSection>(nameData.c_str());
+ cStringSectionMap[name] = sec;
+ return sec;
+ }
};
extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index f288fadc0d14f..59b2264a7f1ab 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1377,11 +1377,8 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
- if (config->dedupStrings)
- in.cStringSection =
- make<DeduplicatedCStringSection>(section_names::cString);
- else
- in.cStringSection = make<CStringSection>(section_names::cString);
+ // Materialize the cstring section
+ in.getOrCreateCStringSection(section_names::cString);
in.objcMethnameSection =
make<DeduplicatedCStringSection>(section_names::objcMethname);
in.wordLiteralSection = make<WordLiteralSection>();
diff --git a/lld/test/MachO/cstring.ll b/lld/test/MachO/cstring.ll
new file mode 100644
index 0000000000000..4f82736b0a5f0
--- /dev/null
+++ b/lld/test/MachO/cstring.ll
@@ -0,0 +1,32 @@
+; REQUIRES: aarch64
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s
+; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+
+; CHECK-DAG: __cstring
+; CHECK-DAG: __new_sec
+; CHECK-DAG: __objc_classname
+; CHECK-DAG: __objc_methname
+; CHECK-DAG: __objc_methtype
+
+; CSTR-DAG: __cstring
+; CSTR-DAG: __objc_methname
+
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+
+ at .str = private unnamed_addr constant [10 x i8] c"my string\00", align 1
+ at .str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1
+ at OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+ at OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+ at OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+
+ at llvm.compiler.used = appending global [5 x ptr] [
+ ptr @.str,
+ ptr @.str1,
+ ptr @OBJC_METH_VAR_NAME_,
+ ptr @OBJC_CLASS_NAME_,
+ ptr @OBJC_METH_VAR_TYPE_
+]
>From 20e7d1a09bc6a5e36c1abaf55865b2b1031a14bd Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Thu, 18 Sep 2025 11:44:36 -0700
Subject: [PATCH 2/2] use vector to store cstring sections
---
lld/MachO/Driver.cpp | 3 +--
lld/MachO/InputSection.cpp | 19 +++++++------------
lld/MachO/MapFile.cpp | 4 +---
lld/MachO/ObjC.cpp | 2 +-
lld/MachO/SyntheticSections.h | 20 +++++++++++++-------
lld/MachO/Writer.cpp | 9 +++++----
6 files changed, 28 insertions(+), 29 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index f54b6bbdc155c..c1a4c0443933a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1520,9 +1520,8 @@ static void foldIdenticalLiterals() {
// We always create a cStringSection, regardless of whether dedupLiterals is
// true. If it isn't, we simply create a non-deduplicating CStringSection.
// Either way, we must unconditionally finalize it here.
- for (auto &[name, sec] : in.cStringSectionMap)
+ for (auto *sec : in.cStringSections)
sec->finalizeContents();
- in.objcMethnameSection->finalizeContents();
in.wordLiteralSection->finalizeContents();
}
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index b7718db45aef6..b173e14cc86a8 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -63,18 +63,13 @@ void lld::macho::addInputSection(InputSection *inputSection) {
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
- if (isec->getName() == section_names::objcMethname) {
- if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
- in.objcMethnameSection->inputOrder = inputSectionsOrder++;
- in.objcMethnameSection->addInput(isec);
- } else {
- auto *osec = in.getOrCreateCStringSection(
- config->separateCstringLiteralSections ? isec->getName()
- : section_names::cString);
- if (osec->inputOrder == UnspecifiedInputOrder)
- osec->inputOrder = inputSectionsOrder++;
- osec->addInput(isec);
- }
+ bool useSectionName = config->separateCstringLiteralSections ||
+ isec->getName() == section_names::objcMethname;
+ auto *osec = in.getOrCreateCStringSection(
+ useSectionName ? isec->getName() : section_names::cString);
+ if (osec->inputOrder == UnspecifiedInputOrder)
+ osec->inputOrder = inputSectionsOrder++;
+ osec->addInput(isec);
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputSectionsOrder++;
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 5e88e19697d67..29ebcdcf9a832 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -239,9 +239,7 @@ void macho::writeMapFile() {
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
- } else if (any_of(in.cStringSectionMap,
- [&](auto &it) { return osec == it.getValue(); }) ||
- osec == in.objcMethnameSection) {
+ } else if (is_contained(in.cStringSections, osec)) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
uint64_t lastAddr = 0; // strings will never start at address 0, so this
// is a sentinel value
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 794b92d2c9d40..ab7f73c3a1df6 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
newStringSec->splitIntoPieces();
newStringSec->pieces[0].live = true;
newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
- in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec);
+ in.cStringSection->addInput(newStringSec);
assert(newStringSec->pieces.size() == 1);
Defined *catNameSym = make<Defined>(
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 130b2d73af810..ffc6202ae2191 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -843,7 +843,8 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
- llvm::StringMap<CStringSection *> cStringSectionMap;
+ llvm::SmallVector<CStringSection *> cStringSections;
+ CStringSection *cStringSection = nullptr;
DeduplicatedCStringSection *objcMethnameSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr;
RebaseSection *rebase = nullptr;
@@ -864,20 +865,25 @@ struct InStruct {
ObjCMethListSection *objcMethList = nullptr;
ChainedFixupsSection *chainedFixups = nullptr;
- CStringSection *getOrCreateCStringSection(StringRef name) {
- auto it = cStringSectionMap.find(name);
- if (it != cStringSectionMap.end())
- return it->getValue();
+ CStringSection *getOrCreateCStringSection(StringRef name,
+ bool forceDedupStrings = false) {
+ auto [it, didEmplace] =
+ cStringSectionMap.try_emplace(name, cStringSections.size());
+ if (!didEmplace)
+ return cStringSections[it->getValue()];
std::string &nameData = *make<std::string>(name);
CStringSection *sec;
- if (config->dedupStrings)
+ if (config->dedupStrings || forceDedupStrings)
sec = make<DeduplicatedCStringSection>(nameData.c_str());
else
sec = make<CStringSection>(nameData.c_str());
- cStringSectionMap[name] = sec;
+ cStringSections.push_back(sec);
return sec;
}
+
+private:
+ llvm::StringMap<unsigned> cStringSectionMap;
};
extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 59b2264a7f1ab..995792be41747 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1377,10 +1377,11 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
- // Materialize the cstring section
- in.getOrCreateCStringSection(section_names::cString);
- in.objcMethnameSection =
- make<DeduplicatedCStringSection>(section_names::objcMethname);
+ // Materialize cstring and objcMethname sections
+ in.cStringSection = in.getOrCreateCStringSection(section_names::cString);
+ in.objcMethnameSection = cast<DeduplicatedCStringSection>(
+ in.getOrCreateCStringSection(section_names::objcMethname,
+ /*forceDedupStrings=*/true));
in.wordLiteralSection = make<WordLiteralSection>();
if (config->emitChainedFixups) {
in.chainedFixups = make<ChainedFixupsSection>();
More information about the llvm-commits
mailing list