[lld] [LLD][MachO] Option to emit separate cstring sections (PR #158720)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 18 11:45:04 PDT 2025


https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/158720

>From c9ee8b2867fa961932bdddc2425e6ed96564a175 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Mon, 15 Sep 2025 12:10:35 -0700
Subject: [PATCH 1/2] [LLD][MachO] Option to emit separate cstring sections

---
 lld/MachO/Config.h            |  1 +
 lld/MachO/Driver.cpp          |  6 +++++-
 lld/MachO/InputSection.cpp    |  9 ++++++---
 lld/MachO/MapFile.cpp         |  4 +++-
 lld/MachO/ObjC.cpp            |  2 +-
 lld/MachO/Options.td          |  7 +++++++
 lld/MachO/SyntheticSections.h | 17 ++++++++++++++++-
 lld/MachO/Writer.cpp          |  7 ++-----
 lld/test/MachO/cstring.ll     | 32 ++++++++++++++++++++++++++++++++
 9 files changed, 73 insertions(+), 12 deletions(-)
 create mode 100644 lld/test/MachO/cstring.ll

diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 19dba790c1c7c..51b1363d87615 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -222,6 +222,7 @@ struct Configuration {
   bool pgoWarnMismatch;
   bool warnThinArchiveMissingMembers;
   bool disableVerify;
+  bool separateCstringLiteralSections;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3db638e1ead96..f54b6bbdc155c 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1520,7 +1520,8 @@ static void foldIdenticalLiterals() {
   // We always create a cStringSection, regardless of whether dedupLiterals is
   // true. If it isn't, we simply create a non-deduplicating CStringSection.
   // Either way, we must unconditionally finalize it here.
-  in.cStringSection->finalizeContents();
+  for (auto &[name, sec] : in.cStringSectionMap)
+    sec->finalizeContents();
   in.objcMethnameSection->finalizeContents();
   in.wordLiteralSection->finalizeContents();
 }
@@ -1981,6 +1982,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
                    OPT_no_warn_thin_archive_missing_members, true);
   config->generateUuid = !args.hasArg(OPT_no_uuid);
   config->disableVerify = args.hasArg(OPT_disable_verify);
+  config->separateCstringLiteralSections =
+      args.hasFlag(OPT_separate_cstring_literal_sections,
+                   OPT_no_separate_cstring_literal_sections, false);
 
   auto IncompatWithCGSort = [&](StringRef firstArgStr) {
     // Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 18b3ff961085b..b7718db45aef6 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -68,9 +68,12 @@ void lld::macho::addInputSection(InputSection *inputSection) {
         in.objcMethnameSection->inputOrder = inputSectionsOrder++;
       in.objcMethnameSection->addInput(isec);
     } else {
-      if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
-        in.cStringSection->inputOrder = inputSectionsOrder++;
-      in.cStringSection->addInput(isec);
+      auto *osec = in.getOrCreateCStringSection(
+          config->separateCstringLiteralSections ? isec->getName()
+                                                 : section_names::cString);
+      if (osec->inputOrder == UnspecifiedInputOrder)
+        osec->inputOrder = inputSectionsOrder++;
+      osec->addInput(isec);
     }
   } else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
     if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index f3e221a700b14..5e88e19697d67 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -239,7 +239,9 @@ void macho::writeMapFile() {
         printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
       } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
         printIsecArrSyms(concatOsec->inputs);
-      } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
+      } else if (any_of(in.cStringSectionMap,
+                        [&](auto &it) { return osec == it.getValue(); }) ||
+                 osec == in.objcMethnameSection) {
         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
         uint64_t lastAddr = 0; // strings will never start at address 0, so this
                                // is a sentinel value
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index ab7f73c3a1df6..794b92d2c9d40 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
   newStringSec->splitIntoPieces();
   newStringSec->pieces[0].live = true;
   newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
-  in.cStringSection->addInput(newStringSec);
+  in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec);
   assert(newStringSec->pieces.size() == 1);
 
   Defined *catNameSym = make<Defined>(
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 8ae50f380741a..4eeb8fbe11121 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">,
 def ignore_auto_link : Flag<["-"], "ignore_auto_link">,
     HelpText<"Ignore LC_LINKER_OPTIONs">,
     Group<grp_rare>;
+defm separate_cstring_literal_sections
+    : BB<"separate-cstring-literal-sections",
+         "Emit all cstring literals into their respective sections defined by "
+         "their section names.",
+         "Emit all cstring literals into the __cstring section. As a special "
+         "case, the __objc_methname section will still be emitted. (default)">,
+      Group<grp_rare>;
 
 def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
 
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5796b0790c83a..130b2d73af810 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -843,7 +843,7 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
 struct InStruct {
   const uint8_t *bufferStart = nullptr;
   MachHeaderSection *header = nullptr;
-  CStringSection *cStringSection = nullptr;
+  llvm::StringMap<CStringSection *> cStringSectionMap;
   DeduplicatedCStringSection *objcMethnameSection = nullptr;
   WordLiteralSection *wordLiteralSection = nullptr;
   RebaseSection *rebase = nullptr;
@@ -863,6 +863,21 @@ struct InStruct {
   InitOffsetsSection *initOffsets = nullptr;
   ObjCMethListSection *objcMethList = nullptr;
   ChainedFixupsSection *chainedFixups = nullptr;
+
+  CStringSection *getOrCreateCStringSection(StringRef name) {
+    auto it = cStringSectionMap.find(name);
+    if (it != cStringSectionMap.end())
+      return it->getValue();
+
+    std::string &nameData = *make<std::string>(name);
+    CStringSection *sec;
+    if (config->dedupStrings)
+      sec = make<DeduplicatedCStringSection>(nameData.c_str());
+    else
+      sec = make<CStringSection>(nameData.c_str());
+    cStringSectionMap[name] = sec;
+    return sec;
+  }
 };
 
 extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index f288fadc0d14f..59b2264a7f1ab 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1377,11 +1377,8 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
 
 void macho::createSyntheticSections() {
   in.header = make<MachHeaderSection>();
-  if (config->dedupStrings)
-    in.cStringSection =
-        make<DeduplicatedCStringSection>(section_names::cString);
-  else
-    in.cStringSection = make<CStringSection>(section_names::cString);
+  // Materialize the cstring section
+  in.getOrCreateCStringSection(section_names::cString);
   in.objcMethnameSection =
       make<DeduplicatedCStringSection>(section_names::objcMethname);
   in.wordLiteralSection = make<WordLiteralSection>();
diff --git a/lld/test/MachO/cstring.ll b/lld/test/MachO/cstring.ll
new file mode 100644
index 0000000000000..4f82736b0a5f0
--- /dev/null
+++ b/lld/test/MachO/cstring.ll
@@ -0,0 +1,32 @@
+; REQUIRES: aarch64
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s
+; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+
+; CHECK-DAG: __cstring
+; CHECK-DAG: __new_sec
+; CHECK-DAG: __objc_classname
+; CHECK-DAG: __objc_methname
+; CHECK-DAG: __objc_methtype
+
+; CSTR-DAG: __cstring
+; CSTR-DAG: __objc_methname
+
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+
+ at .str = private unnamed_addr constant [10 x i8] c"my string\00", align 1
+ at .str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1
+ at OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+ at OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+ at OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+
+ at llvm.compiler.used = appending global [5 x ptr] [
+  ptr @.str,
+  ptr @.str1,
+  ptr @OBJC_METH_VAR_NAME_,
+  ptr @OBJC_CLASS_NAME_,
+  ptr @OBJC_METH_VAR_TYPE_
+]

>From 20e7d1a09bc6a5e36c1abaf55865b2b1031a14bd Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Thu, 18 Sep 2025 11:44:36 -0700
Subject: [PATCH 2/2] use vector to store cstring sections

---
 lld/MachO/Driver.cpp          |  3 +--
 lld/MachO/InputSection.cpp    | 19 +++++++------------
 lld/MachO/MapFile.cpp         |  4 +---
 lld/MachO/ObjC.cpp            |  2 +-
 lld/MachO/SyntheticSections.h | 20 +++++++++++++-------
 lld/MachO/Writer.cpp          |  9 +++++----
 6 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index f54b6bbdc155c..c1a4c0443933a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1520,9 +1520,8 @@ static void foldIdenticalLiterals() {
   // We always create a cStringSection, regardless of whether dedupLiterals is
   // true. If it isn't, we simply create a non-deduplicating CStringSection.
   // Either way, we must unconditionally finalize it here.
-  for (auto &[name, sec] : in.cStringSectionMap)
+  for (auto *sec : in.cStringSections)
     sec->finalizeContents();
-  in.objcMethnameSection->finalizeContents();
   in.wordLiteralSection->finalizeContents();
 }
 
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index b7718db45aef6..b173e14cc86a8 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -63,18 +63,13 @@ void lld::macho::addInputSection(InputSection *inputSection) {
     isec->parent = osec;
     inputSections.push_back(isec);
   } else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
-    if (isec->getName() == section_names::objcMethname) {
-      if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
-        in.objcMethnameSection->inputOrder = inputSectionsOrder++;
-      in.objcMethnameSection->addInput(isec);
-    } else {
-      auto *osec = in.getOrCreateCStringSection(
-          config->separateCstringLiteralSections ? isec->getName()
-                                                 : section_names::cString);
-      if (osec->inputOrder == UnspecifiedInputOrder)
-        osec->inputOrder = inputSectionsOrder++;
-      osec->addInput(isec);
-    }
+    bool useSectionName = config->separateCstringLiteralSections ||
+                          isec->getName() == section_names::objcMethname;
+    auto *osec = in.getOrCreateCStringSection(
+        useSectionName ? isec->getName() : section_names::cString);
+    if (osec->inputOrder == UnspecifiedInputOrder)
+      osec->inputOrder = inputSectionsOrder++;
+    osec->addInput(isec);
   } else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
     if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
       in.wordLiteralSection->inputOrder = inputSectionsOrder++;
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 5e88e19697d67..29ebcdcf9a832 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -239,9 +239,7 @@ void macho::writeMapFile() {
         printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
       } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
         printIsecArrSyms(concatOsec->inputs);
-      } else if (any_of(in.cStringSectionMap,
-                        [&](auto &it) { return osec == it.getValue(); }) ||
-                 osec == in.objcMethnameSection) {
+      } else if (is_contained(in.cStringSections, osec)) {
         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
         uint64_t lastAddr = 0; // strings will never start at address 0, so this
                                // is a sentinel value
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 794b92d2c9d40..ab7f73c3a1df6 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
   newStringSec->splitIntoPieces();
   newStringSec->pieces[0].live = true;
   newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
-  in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec);
+  in.cStringSection->addInput(newStringSec);
   assert(newStringSec->pieces.size() == 1);
 
   Defined *catNameSym = make<Defined>(
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 130b2d73af810..ffc6202ae2191 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -843,7 +843,8 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
 struct InStruct {
   const uint8_t *bufferStart = nullptr;
   MachHeaderSection *header = nullptr;
-  llvm::StringMap<CStringSection *> cStringSectionMap;
+  llvm::SmallVector<CStringSection *> cStringSections;
+  CStringSection *cStringSection = nullptr;
   DeduplicatedCStringSection *objcMethnameSection = nullptr;
   WordLiteralSection *wordLiteralSection = nullptr;
   RebaseSection *rebase = nullptr;
@@ -864,20 +865,25 @@ struct InStruct {
   ObjCMethListSection *objcMethList = nullptr;
   ChainedFixupsSection *chainedFixups = nullptr;
 
-  CStringSection *getOrCreateCStringSection(StringRef name) {
-    auto it = cStringSectionMap.find(name);
-    if (it != cStringSectionMap.end())
-      return it->getValue();
+  CStringSection *getOrCreateCStringSection(StringRef name,
+                                            bool forceDedupStrings = false) {
+    auto [it, didEmplace] =
+        cStringSectionMap.try_emplace(name, cStringSections.size());
+    if (!didEmplace)
+      return cStringSections[it->getValue()];
 
     std::string &nameData = *make<std::string>(name);
     CStringSection *sec;
-    if (config->dedupStrings)
+    if (config->dedupStrings || forceDedupStrings)
       sec = make<DeduplicatedCStringSection>(nameData.c_str());
     else
       sec = make<CStringSection>(nameData.c_str());
-    cStringSectionMap[name] = sec;
+    cStringSections.push_back(sec);
     return sec;
   }
+
+private:
+  llvm::StringMap<unsigned> cStringSectionMap;
 };
 
 extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 59b2264a7f1ab..995792be41747 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1377,10 +1377,11 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
 
 void macho::createSyntheticSections() {
   in.header = make<MachHeaderSection>();
-  // Materialize the cstring section
-  in.getOrCreateCStringSection(section_names::cString);
-  in.objcMethnameSection =
-      make<DeduplicatedCStringSection>(section_names::objcMethname);
+  // Materialize cstring and objcMethname sections
+  in.cStringSection = in.getOrCreateCStringSection(section_names::cString);
+  in.objcMethnameSection = cast<DeduplicatedCStringSection>(
+      in.getOrCreateCStringSection(section_names::objcMethname,
+                                   /*forceDedupStrings=*/true));
   in.wordLiteralSection = make<WordLiteralSection>();
   if (config->emitChainedFixups) {
     in.chainedFixups = make<ChainedFixupsSection>();



More information about the llvm-commits mailing list