[lld] [lld][MachO] Tail merge strings (PR #161262)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 1 14:25:09 PDT 2025


https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/161262

>From 2e997dba5502eec845f0371e0aef083281f77fc4 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 10:20:28 -0700
Subject: [PATCH 1/9] [lld][macho][NFC] Factor count zeros into helper function

---
 lld/MachO/SyntheticSections.cpp | 57 +++++++++++++++++----------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 228b84db21c2a..d38b6c9e00157 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1685,31 +1685,7 @@ void CStringSection::writeTo(uint8_t *buf) const {
   }
 }
 
-void CStringSection::finalizeContents() {
-  uint64_t offset = 0;
-  // TODO: Call buildCStringPriorities() to support cstring ordering when
-  // deduplication is off, although this may negatively impact build
-  // performance.
-  for (CStringInputSection *isec : inputs) {
-    for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
-      if (!piece.live)
-        continue;
-      // See comment above DeduplicatedCStringSection for how alignment is
-      // handled.
-      uint32_t pieceAlign = 1
-                            << llvm::countr_zero(isec->align | piece.inSecOff);
-      offset = alignToPowerOf2(offset, pieceAlign);
-      piece.outSecOff = offset;
-      isec->isFinal = true;
-      StringRef string = isec->getStringRef(i);
-      offset += string.size() + 1; // account for null terminator
-    }
-  }
-  size = offset;
-}
-
-// Mergeable cstring literals are found under the __TEXT,__cstring section. In
-// contrast to ELF, which puts strings that need different alignments into
+// In contrast to ELF, which puts strings that need different alignments into
 // different sections, clang's Mach-O backend puts them all in one section.
 // Strings that need to be aligned have the .p2align directive emitted before
 // them, which simply translates into zero padding in the object file. In other
@@ -1744,8 +1720,33 @@ void CStringSection::finalizeContents() {
 // requires its operand addresses to be 16-byte aligned). However, there will
 // typically also be other cstrings in the same file that aren't used via SIMD
 // and don't need this alignment. They will be emitted at some arbitrary address
-// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
-// % A`.
+// `A`, but ld64 will treat them as being 16-byte aligned with an offset of
+// `16 % A`.
+static uint8_t getStringPieceAlignment(const CStringInputSection *isec,
+                                     const StringPiece &piece) {
+  return llvm::countr_zero(isec->align | piece.inSecOff);
+}
+
+void CStringSection::finalizeContents() {
+  uint64_t offset = 0;
+  // TODO: Call buildCStringPriorities() to support cstring ordering when
+  // deduplication is off, although this may negatively impact build
+  // performance.
+  for (CStringInputSection *isec : inputs) {
+    for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
+      if (!piece.live)
+        continue;
+      uint32_t pieceAlign = 1 << getStringPieceAlignment(isec, piece);
+      offset = alignToPowerOf2(offset, pieceAlign);
+      piece.outSecOff = offset;
+      isec->isFinal = true;
+      StringRef string = isec->getStringRef(i);
+      offset += string.size() + 1; // account for null terminator
+    }
+  }
+  size = offset;
+}
+
 void DeduplicatedCStringSection::finalizeContents() {
   // Find the largest alignment required for each string.
   for (const CStringInputSection *isec : inputs) {
@@ -1754,7 +1755,7 @@ void DeduplicatedCStringSection::finalizeContents() {
         continue;
       auto s = isec->getCachedHashStringRef(i);
       assert(isec->align != 0);
-      uint8_t trailingZeros = llvm::countr_zero(isec->align | piece.inSecOff);
+      uint8_t trailingZeros = getStringPieceAlignment(isec, piece);
       auto it = stringOffsetMap.insert(
           std::make_pair(s, StringOffset(trailingZeros)));
       if (!it.second && it.first->second.trailingZeros < trailingZeros)

>From ee87e104056cc89a9d5e151edefca335aa823909 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 10:47:52 -0700
Subject: [PATCH 2/9] fix format

---
 lld/MachO/SyntheticSections.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index d38b6c9e00157..5645d8a05a28f 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1723,7 +1723,7 @@ void CStringSection::writeTo(uint8_t *buf) const {
 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of
 // `16 % A`.
 static uint8_t getStringPieceAlignment(const CStringInputSection *isec,
-                                     const StringPiece &piece) {
+                                       const StringPiece &piece) {
   return llvm::countr_zero(isec->align | piece.inSecOff);
 }
 

>From d47f1b9a1075e9588e0bc54afb2da33838ba71b1 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 11:03:21 -0700
Subject: [PATCH 3/9] [lld][MachO] Use llvm::Align and remove StringOffset type

---
 lld/MachO/SyntheticSections.cpp | 64 ++++++++++++++-------------------
 lld/MachO/SyntheticSections.h   | 12 ++-----
 2 files changed, 28 insertions(+), 48 deletions(-)

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 5645d8a05a28f..38386c107fea0 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -848,8 +848,7 @@ void ObjCSelRefsHelper::initialize() {
 void ObjCSelRefsHelper::cleanup() { methnameToSelref.clear(); }
 
 ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) {
-  auto methnameOffset =
-      in.objcMethnameSection->getStringOffset(methname).outSecOff;
+  auto methnameOffset = in.objcMethnameSection->getStringOffset(methname);
 
   size_t wordSize = target->wordSize;
   uint8_t *selrefData = bAlloc().Allocate<uint8_t>(wordSize);
@@ -1722,13 +1721,12 @@ void CStringSection::writeTo(uint8_t *buf) const {
 // and don't need this alignment. They will be emitted at some arbitrary address
 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of
 // `16 % A`.
-static uint8_t getStringPieceAlignment(const CStringInputSection *isec,
-                                       const StringPiece &piece) {
-  return llvm::countr_zero(isec->align | piece.inSecOff);
+static Align getStringPieceAlignment(const CStringInputSection *isec,
+                                     const StringPiece &piece) {
+  return llvm::Align(1ULL << llvm::countr_zero(isec->align | piece.inSecOff));
 }
 
 void CStringSection::finalizeContents() {
-  uint64_t offset = 0;
   // TODO: Call buildCStringPriorities() to support cstring ordering when
   // deduplication is off, although this may negatively impact build
   // performance.
@@ -1736,30 +1734,27 @@ void CStringSection::finalizeContents() {
     for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
       if (!piece.live)
         continue;
-      uint32_t pieceAlign = 1 << getStringPieceAlignment(isec, piece);
-      offset = alignToPowerOf2(offset, pieceAlign);
-      piece.outSecOff = offset;
-      isec->isFinal = true;
+      piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece));
       StringRef string = isec->getStringRef(i);
-      offset += string.size() + 1; // account for null terminator
+      size = piece.outSecOff + string.size() + 1; // account for null terminator
     }
+    isec->isFinal = true;
   }
-  size = offset;
 }
 
 void DeduplicatedCStringSection::finalizeContents() {
   // Find the largest alignment required for each string.
+  DenseMap<CachedHashStringRef, Align> strToAlignment;
   for (const CStringInputSection *isec : inputs) {
     for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
       if (!piece.live)
         continue;
       auto s = isec->getCachedHashStringRef(i);
       assert(isec->align != 0);
-      uint8_t trailingZeros = getStringPieceAlignment(isec, piece);
-      auto it = stringOffsetMap.insert(
-          std::make_pair(s, StringOffset(trailingZeros)));
-      if (!it.second && it.first->second.trailingZeros < trailingZeros)
-        it.first->second.trailingZeros = trailingZeros;
+      auto align = getStringPieceAlignment(isec, piece);
+      auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
+      if (!wasInserted && it->second < align)
+        it->second = align;
     }
   }
 
@@ -1769,38 +1764,31 @@ void DeduplicatedCStringSection::finalizeContents() {
   for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
     auto &piece = isec->pieces[i];
     auto s = isec->getCachedHashStringRef(i);
-    auto it = stringOffsetMap.find(s);
-    assert(it != stringOffsetMap.end());
-    lld::macho::DeduplicatedCStringSection::StringOffset &offsetInfo =
-        it->second;
-    if (offsetInfo.outSecOff == UINT64_MAX) {
-      offsetInfo.outSecOff =
-          alignToPowerOf2(size, 1ULL << offsetInfo.trailingZeros);
-      size = offsetInfo.outSecOff + s.size() + 1; // account for null terminator
+    auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
+    if (wasInserted) {
+      // Avoid computing the offset until we are sure we will need to
+      uint64_t offset = alignTo(size, strToAlignment.at(s));
+      it->second = offset;
+      size = offset + s.size() + 1; // account for null terminator
     }
-    piece.outSecOff = offsetInfo.outSecOff;
+    // If the string was already in stringOffsetMap, it is a duplicate and we
+    // only need to assign the offset.
+    piece.outSecOff = it->second;
   }
   for (CStringInputSection *isec : inputs)
     isec->isFinal = true;
 }
 
 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
-  for (const auto &p : stringOffsetMap) {
-    StringRef data = p.first.val();
-    uint64_t off = p.second.outSecOff;
-    if (!data.empty())
-      memcpy(buf + off, data.data(), data.size());
-  }
+  for (const auto &[s, outSecOff] : stringOffsetMap)
+    if (s.size())
+      memcpy(buf + outSecOff, s.data(), s.size());
 }
 
-DeduplicatedCStringSection::StringOffset
-DeduplicatedCStringSection::getStringOffset(StringRef str) const {
+uint64_t DeduplicatedCStringSection::getStringOffset(StringRef str) const {
   // StringPiece uses 31 bits to store the hashes, so we replicate that
   uint32_t hash = xxh3_64bits(str) & 0x7fffffff;
-  auto offset = stringOffsetMap.find(CachedHashStringRef(str, hash));
-  assert(offset != stringOffsetMap.end() &&
-         "Looked-up strings should always exist in section");
-  return offset->second;
+  return stringOffsetMap.at(CachedHashStringRef(str, hash));
 }
 
 // This section is actually emitted as __TEXT,__const by ld64, but clang may
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 1abf3c210a64e..a37dd66107ee7 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -571,18 +571,10 @@ class DeduplicatedCStringSection final : public CStringSection {
   uint64_t getSize() const override { return size; }
   void finalizeContents() override;
   void writeTo(uint8_t *buf) const override;
-
-  struct StringOffset {
-    uint8_t trailingZeros;
-    uint64_t outSecOff = UINT64_MAX;
-
-    explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
-  };
-
-  StringOffset getStringOffset(StringRef str) const;
+  uint64_t getStringOffset(StringRef str) const;
 
 private:
-  llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
+  llvm::DenseMap<llvm::CachedHashStringRef, uint64_t> stringOffsetMap;
   size_t size = 0;
 };
 

>From 4e0bde29f824a1dc2cfce4a64ad052783e80db11 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 12:01:51 -0700
Subject: [PATCH 4/9] init size to zero

---
 lld/MachO/SyntheticSections.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 38386c107fea0..903ba78a27c75 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1727,6 +1727,7 @@ static Align getStringPieceAlignment(const CStringInputSection *isec,
 }
 
 void CStringSection::finalizeContents() {
+  size = 0;
   // TODO: Call buildCStringPriorities() to support cstring ordering when
   // deduplication is off, although this may negatively impact build
   // performance.

>From 4742cca2ec71b6651f76c12b9d4aea2706cec02c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 12:21:49 -0700
Subject: [PATCH 5/9] [lld][MachO] Tail merge strings

---
 lld/MachO/SyntheticSections.cpp    | 58 +++++++++++++++++++-
 lld/test/MachO/cstring-dedup.s     |  3 +-
 lld/test/MachO/cstring-tailmerge.s | 85 ++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 3 deletions(-)
 create mode 100644 lld/test/MachO/cstring-tailmerge.s

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 903ba78a27c75..460a0b5a16ab0 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1746,6 +1746,7 @@ void CStringSection::finalizeContents() {
 void DeduplicatedCStringSection::finalizeContents() {
   // Find the largest alignment required for each string.
   DenseMap<CachedHashStringRef, Align> strToAlignment;
+  std::vector<CachedHashStringRef> deduplicatedStrs;
   for (const CStringInputSection *isec : inputs) {
     for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
       if (!piece.live)
@@ -1754,17 +1755,57 @@ void DeduplicatedCStringSection::finalizeContents() {
       assert(isec->align != 0);
       auto align = getStringPieceAlignment(isec, piece);
       auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
+      if (wasInserted)
+        deduplicatedStrs.push_back(s);
       if (!wasInserted && it->second < align)
         it->second = align;
     }
   }
 
+  // Like lexigraphical sort, except we read strings in reverse and take the
+  // longest string first
+  // TODO: We could improve performance by implementing our own sort that avoids
+  // comparing characters we know to be the same. See
+  // StringTableBuilder::multikeySort() for details
+  llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
+    for (const auto &[leftChar, rightChar] :
+         llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
+      if (leftChar == rightChar)
+        continue;
+      return leftChar < rightChar;
+    }
+    return left.size() > right.size();
+  });
+  std::optional<CachedHashStringRef> mergeCandidate;
+  DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
+      tailMergeMap;
+  for (auto &s : deduplicatedStrs) {
+    if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
+      mergeCandidate = s;
+      continue;
+    }
+    uint64_t tailOffset = mergeCandidate->size() - s.size();
+    // TODO: If the tail offset is incompatible with this string's alignment, we
+    // might be able to find another superstring with a compatible tail offset.
+    // The difficulty is how to do this efficiently
+    const auto &align = strToAlignment.at(s);
+    if (!isAligned(align, tailOffset))
+      continue;
+    auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
+    if (align > mergeCandidateAlign)
+      mergeCandidateAlign = align;
+    tailMergeMap.try_emplace(s, *mergeCandidate, tailOffset);
+  }
+
   // Sort the strings for performance and compression size win, and then
   // assign an offset for each string and save it to the corresponding
   // StringPieces for easy access.
   for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
     auto &piece = isec->pieces[i];
     auto s = isec->getCachedHashStringRef(i);
+    // Skip tail merged strings until their superstring offsets are resolved
+    if (tailMergeMap.count(s))
+      continue;
     auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
     if (wasInserted) {
       // Avoid computing the offset until we are sure we will need to
@@ -1776,8 +1817,23 @@ void DeduplicatedCStringSection::finalizeContents() {
     // only need to assign the offset.
     piece.outSecOff = it->second;
   }
-  for (CStringInputSection *isec : inputs)
+  for (CStringInputSection *isec : inputs) {
+    for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
+      if (!piece.live)
+        continue;
+      auto s = isec->getCachedHashStringRef(i);
+      auto it = tailMergeMap.find(s);
+      if (it == tailMergeMap.end())
+        continue;
+      const auto &[superString, tailOffset] = it->second;
+      assert(!tailMergeMap.count(superString));
+      auto &outSecOff = stringOffsetMap[s];
+      outSecOff = stringOffsetMap.at(superString) + tailOffset;
+      piece.outSecOff = outSecOff;
+      assert(isAligned(strToAlignment.at(s), piece.outSecOff));
+    }
     isec->isFinal = true;
+  }
 }
 
 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s
index a4b15f26afff0..0a42b3d6fcff3 100644
--- a/lld/test/MachO/cstring-dedup.s
+++ b/lld/test/MachO/cstring-dedup.s
@@ -8,11 +8,10 @@
 # RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s
 # RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER
 
-## Make sure we only have 3 deduplicated strings in __cstring.
+## Make sure we only have 2 deduplicated strings in __cstring.
 # STR: Contents of (__TEXT,__cstring) section
 # STR: {{[[:xdigit:]]+}} foo
 # STR: {{[[:xdigit:]]+}} barbaz
-# STR: {{[[:xdigit:]]+}} {{$}}
 
 ## Make sure both symbol and section relocations point to the right thing.
 # CHECK:      Contents of (__DATA,ptrs) section
diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s
new file mode 100644
index 0000000000000..83d2810a78139
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge.s
@@ -0,0 +1,85 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
+# RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
+# RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-1.s -o %t/align-1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-2.s -o %t/align-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4.s -o %t/align-4.o
+
+# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-1.o -o %t/align-1
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
+
+# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-2.o -o %t/align-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
+
+# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-4.o -o %t/align-4
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
+
+# CHECK: Contents of (__TEXT,__cstring) section
+# CHECK: [[#%.16x,START:]] get awkward offset{{$}}
+
+# ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
+# ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
+
+# ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
+# ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
+# ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
+# ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
+
+# ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
+
+# CHECK: SYMBOL TABLE:
+
+# ALIGN1: [[#%.16x,START+19]] l     O __TEXT,__cstring _myotherlongstr
+# ALIGN1: [[#%.16x,START+21]] l     O __TEXT,__cstring _otherlongstr
+# ALIGN1: [[#%.16x,START+26]] l     O __TEXT,__cstring _longstr
+# ALIGN1: [[#%.16x,START+34]] l     O __TEXT,__cstring _otherstr
+# ALIGN1: [[#%.16x,START+39]] l     O __TEXT,__cstring _str
+
+# ALIGN2: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
+# ALIGN2: [[#%.16x,START+20+2]] l     O __TEXT,__cstring _otherlongstr
+# ALIGN2: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _longstr
+# ALIGN2: [[#%.16x,START+20+16+8]] l     O __TEXT,__cstring _otherstr
+# ALIGN2: [[#%.16x,START+20+16+8+10]] l     O __TEXT,__cstring _str
+
+# ALIGN4: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
+# ALIGN4: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _otherlongstr
+# ALIGN4: [[#%.16x,START+20+16+16]] l     O __TEXT,__cstring _longstr
+# ALIGN4: [[#%.16x,START+20+16+16+8]] l     O __TEXT,__cstring _otherstr
+# ALIGN4: [[#%.16x,START+20+16+16+8+12]] l     O __TEXT,__cstring _str
+
+#--- first.s
+.cstring
+.p2align 2
+.asciz "get awkward offset"  # length = 19
+
+#--- align.s.template
+.cstring
+
+.p2align <ALIGN>
+  _myotherlongstr:
+.asciz "myotherlongstr"      # length = 15
+
+.p2align <ALIGN>
+  _otherlongstr:
+.asciz   "otherlongstr"      # length = 13, tail offset = 2
+
+.p2align <ALIGN>
+  _longstr:
+.asciz        "longstr"      # length = 8, tail offset = 7
+
+.p2align <ALIGN>
+  _otherstr:
+.asciz       "otherstr"      # length = 9
+
+.p2align <ALIGN>
+  _str:
+.asciz            "str"      # length = 4, tail offset = 5

>From 73693d673d5b4b2ce4588248b4acaf870ffb6c01 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 16:04:17 -0700
Subject: [PATCH 6/9] add to release notes

---
 lld/docs/ReleaseNotes.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 6ea1ea0fd6c2f..fa2247d64b690 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -44,6 +44,9 @@ MinGW Improvements
 MachO Improvements
 ------------------
 
+* cstrings sections are now tail merged
+  (`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)
+
 WebAssembly Improvements
 ------------------------
 

>From 6a1da5f20f915b07275ed14f8259aa5e144f72e0 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 16:10:00 -0700
Subject: [PATCH 7/9] add assert

---
 lld/MachO/SyntheticSections.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 460a0b5a16ab0..edb90fe7fcbcc 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1826,6 +1826,7 @@ void DeduplicatedCStringSection::finalizeContents() {
       if (it == tailMergeMap.end())
         continue;
       const auto &[superString, tailOffset] = it->second;
+      assert(superString.val().ends_with(s.val()));
       assert(!tailMergeMap.count(superString));
       auto &outSecOff = stringOffsetMap[s];
       outSecOff = stringOffsetMap.at(superString) + tailOffset;

>From fdaca77c076b135333b75b75c767313eaded1f10 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Wed, 1 Oct 2025 14:15:17 -0700
Subject: [PATCH 8/9] order correctly and add more tests

---
 lld/MachO/Config.h                            |   1 +
 lld/MachO/Driver.cpp                          |   2 +
 lld/MachO/Options.td                          |   4 +
 lld/MachO/SyntheticSections.cpp               |  51 +++----
 lld/test/MachO/cstring-dedup.s                |   3 +-
 lld/test/MachO/cstring-tailmerge-objc.s       | 144 ++++++++++++++++++
 lld/test/MachO/cstring-tailmerge.s            | 104 ++++++-------
 lld/test/MachO/order-file-cstring-tailmerge.s |  56 +++++++
 8 files changed, 285 insertions(+), 80 deletions(-)
 create mode 100644 lld/test/MachO/cstring-tailmerge-objc.s
 create mode 100644 lld/test/MachO/order-file-cstring-tailmerge.s

diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 51b1363d87615..a2ca5770bf952 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -223,6 +223,7 @@ struct Configuration {
   bool warnThinArchiveMissingMembers;
   bool disableVerify;
   bool separateCstringLiteralSections;
+  bool tailMergeStrings;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7ce987e400a24..94f441b7643a7 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1986,6 +1986,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   config->separateCstringLiteralSections =
       args.hasFlag(OPT_separate_cstring_literal_sections,
                    OPT_no_separate_cstring_literal_sections, false);
+  config->tailMergeStrings =
+      args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
 
   auto IncompatWithCGSort = [&](StringRef firstArgStr) {
     // Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4eeb8fbe11121..be1a1cc2963d9 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections
          "Emit all cstring literals into the __cstring section. As a special "
          "case, the __objc_methname section will still be emitted. (default)">,
       Group<grp_rare>;
+defm tail_merge_strings
+    : BB<"tail-merge-strings", "Enable string tail merging",
+         "Disable string tail merging to improve link-time performance">,
+      Group<grp_rare>;
 
 def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
 
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index edb90fe7fcbcc..187cccbe90dbc 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1746,6 +1746,7 @@ void CStringSection::finalizeContents() {
 void DeduplicatedCStringSection::finalizeContents() {
   // Find the largest alignment required for each string.
   DenseMap<CachedHashStringRef, Align> strToAlignment;
+  // Used for tail merging only
   std::vector<CachedHashStringRef> deduplicatedStrs;
   for (const CStringInputSection *isec : inputs) {
     for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
@@ -1755,7 +1756,7 @@ void DeduplicatedCStringSection::finalizeContents() {
       assert(isec->align != 0);
       auto align = getStringPieceAlignment(isec, piece);
       auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
-      if (wasInserted)
+      if (config->tailMergeStrings && wasInserted)
         deduplicatedStrs.push_back(s);
       if (!wasInserted && it->second < align)
         it->second = align;
@@ -1784,17 +1785,17 @@ void DeduplicatedCStringSection::finalizeContents() {
       mergeCandidate = s;
       continue;
     }
-    uint64_t tailOffset = mergeCandidate->size() - s.size();
+    uint64_t tailMergeOffset = mergeCandidate->size() - s.size();
     // TODO: If the tail offset is incompatible with this string's alignment, we
     // might be able to find another superstring with a compatible tail offset.
     // The difficulty is how to do this efficiently
     const auto &align = strToAlignment.at(s);
-    if (!isAligned(align, tailOffset))
+    if (!isAligned(align, tailMergeOffset))
       continue;
     auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
     if (align > mergeCandidateAlign)
       mergeCandidateAlign = align;
-    tailMergeMap.try_emplace(s, *mergeCandidate, tailOffset);
+    tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset);
   }
 
   // Sort the strings for performance and compression size win, and then
@@ -1803,9 +1804,18 @@ void DeduplicatedCStringSection::finalizeContents() {
   for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
     auto &piece = isec->pieces[i];
     auto s = isec->getCachedHashStringRef(i);
-    // Skip tail merged strings until their superstring offsets are resolved
-    if (tailMergeMap.count(s))
-      continue;
+    // Any string can be tail merged with itself with an offset of zero
+    uint64_t tailMergeOffset = 0;
+    auto mergeIt =
+        config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end();
+    if (mergeIt != tailMergeMap.end()) {
+      auto &[superString, offset] = mergeIt->second;
+      // s can be tail merged with superString. Do not layout s. Instead layout
+      // superString if we haven't already
+      assert(superString.val().ends_with(s.val()));
+      s = superString;
+      tailMergeOffset = offset;
+    }
     auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
     if (wasInserted) {
       // Avoid computing the offset until we are sure we will need to
@@ -1813,28 +1823,15 @@ void DeduplicatedCStringSection::finalizeContents() {
       it->second = offset;
       size = offset + s.size() + 1; // account for null terminator
     }
-    // If the string was already in stringOffsetMap, it is a duplicate and we
-    // only need to assign the offset.
-    piece.outSecOff = it->second;
-  }
-  for (CStringInputSection *isec : inputs) {
-    for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
-      if (!piece.live)
-        continue;
-      auto s = isec->getCachedHashStringRef(i);
-      auto it = tailMergeMap.find(s);
-      if (it == tailMergeMap.end())
-        continue;
-      const auto &[superString, tailOffset] = it->second;
-      assert(superString.val().ends_with(s.val()));
-      assert(!tailMergeMap.count(superString));
-      auto &outSecOff = stringOffsetMap[s];
-      outSecOff = stringOffsetMap.at(superString) + tailOffset;
-      piece.outSecOff = outSecOff;
-      assert(isAligned(strToAlignment.at(s), piece.outSecOff));
+    piece.outSecOff = it->second + tailMergeOffset;
+    if (mergeIt != tailMergeMap.end()) {
+      auto &tailMergedString = mergeIt->first;
+      stringOffsetMap[tailMergedString] = piece.outSecOff;
+      assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
     }
-    isec->isFinal = true;
   }
+  for (CStringInputSection *isec : inputs)
+    isec->isFinal = true;
 }
 
 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s
index 0a42b3d6fcff3..a4b15f26afff0 100644
--- a/lld/test/MachO/cstring-dedup.s
+++ b/lld/test/MachO/cstring-dedup.s
@@ -8,10 +8,11 @@
 # RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s
 # RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER
 
-## Make sure we only have 2 deduplicated strings in __cstring.
+## Make sure we only have 3 deduplicated strings in __cstring.
 # STR: Contents of (__TEXT,__cstring) section
 # STR: {{[[:xdigit:]]+}} foo
 # STR: {{[[:xdigit:]]+}} barbaz
+# STR: {{[[:xdigit:]]+}} {{$}}
 
 ## Make sure both symbol and section relocations point to the right thing.
 # CHECK:      Contents of (__DATA,ptrs) section
diff --git a/lld/test/MachO/cstring-tailmerge-objc.s b/lld/test/MachO/cstring-tailmerge-objc.s
new file mode 100644
index 0000000000000..270dcc7562613
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge-objc.s
@@ -0,0 +1,144 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; Test that ObjC method names are tail merged and
+; ObjCSelRefsHelper::makeSelRef() still works correctly
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/a.o -o %t/a
+; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error
+
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --no-tail-merge-strings %t/a.o -o %t/nomerge
+; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error
+
+; CHECK: withBar:error:
+; NOMERGE: error:
+
+;--- a.mm
+__attribute__((objc_root_class))
+ at interface Foo
+- (void)withBar:(int)bar error:(int)error;
+- (void)error:(int)error;
+ at end
+
+ at implementation Foo
+- (void)withBar:(int)bar error:(int)error {}
+- (void)error:(int)error {}
+ at end
+
+void *_objc_empty_cache;
+void *_objc_empty_vtable;
+;--- gen
+clang -Oz -target arm64-apple-darwin a.mm -S -o -
+;--- a.s
+	.build_version macos, 11, 0
+	.section	__TEXT,__text,regular,pure_instructions
+	.p2align	2                               ; -- Begin function -[Foo withBar:error:]
+"-[Foo withBar:error:]":                ; @"\01-[Foo withBar:error:]"
+	.cfi_startproc
+; %bb.0:
+	ret
+	.cfi_endproc
+                                        ; -- End function
+	.p2align	2                               ; -- Begin function -[Foo error:]
+"-[Foo error:]":                        ; @"\01-[Foo error:]"
+	.cfi_startproc
+; %bb.0:
+	ret
+	.cfi_endproc
+                                        ; -- End function
+	.globl	__objc_empty_vtable             ; @_objc_empty_vtable
+.zerofill __DATA,__common,__objc_empty_vtable,8,3
+	.section	__DATA,__objc_data
+	.globl	_OBJC_CLASS_$_Foo               ; @"OBJC_CLASS_$_Foo"
+	.p2align	3, 0x0
+_OBJC_CLASS_$_Foo:
+	.quad	_OBJC_METACLASS_$_Foo
+	.quad	0
+	.quad	__objc_empty_cache
+	.quad	__objc_empty_vtable
+	.quad	__OBJC_CLASS_RO_$_Foo
+
+	.globl	_OBJC_METACLASS_$_Foo           ; @"OBJC_METACLASS_$_Foo"
+	.p2align	3, 0x0
+_OBJC_METACLASS_$_Foo:
+	.quad	_OBJC_METACLASS_$_Foo
+	.quad	_OBJC_CLASS_$_Foo
+	.quad	__objc_empty_cache
+	.quad	__objc_empty_vtable
+	.quad	__OBJC_METACLASS_RO_$_Foo
+
+	.section	__TEXT,__objc_classname,cstring_literals
+l_OBJC_CLASS_NAME_:                     ; @OBJC_CLASS_NAME_
+	.asciz	"Foo"
+
+	.section	__DATA,__objc_const
+	.p2align	3, 0x0                          ; @"_OBJC_METACLASS_RO_$_Foo"
+__OBJC_METACLASS_RO_$_Foo:
+	.long	3                               ; 0x3
+	.long	40                              ; 0x28
+	.long	40                              ; 0x28
+	.space	4
+	.quad	0
+	.quad	l_OBJC_CLASS_NAME_
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.section	__TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_:                  ; @OBJC_METH_VAR_NAME_
+	.asciz	"withBar:error:"
+
+	.section	__TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_:                  ; @OBJC_METH_VAR_TYPE_
+	.asciz	"v24 at 0:8i16i20"
+
+	.section	__TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_.1:                ; @OBJC_METH_VAR_NAME_.1
+	.asciz	"error:"
+
+	.section	__TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_.2:                ; @OBJC_METH_VAR_TYPE_.2
+	.asciz	"v20 at 0:8i16"
+
+	.section	__DATA,__objc_const
+	.p2align	3, 0x0                          ; @"_OBJC_$_INSTANCE_METHODS_Foo"
+__OBJC_$_INSTANCE_METHODS_Foo:
+	.long	24                              ; 0x18
+	.long	2                               ; 0x2
+	.quad	l_OBJC_METH_VAR_NAME_
+	.quad	l_OBJC_METH_VAR_TYPE_
+	.quad	"-[Foo withBar:error:]"
+	.quad	l_OBJC_METH_VAR_NAME_.1
+	.quad	l_OBJC_METH_VAR_TYPE_.2
+	.quad	"-[Foo error:]"
+
+	.p2align	3, 0x0                          ; @"_OBJC_CLASS_RO_$_Foo"
+__OBJC_CLASS_RO_$_Foo:
+	.long	2                               ; 0x2
+	.long	0                               ; 0x0
+	.long	0                               ; 0x0
+	.space	4
+	.quad	0
+	.quad	l_OBJC_CLASS_NAME_
+	.quad	__OBJC_$_INSTANCE_METHODS_Foo
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.globl	__objc_empty_cache              ; @_objc_empty_cache
+.zerofill __DATA,__common,__objc_empty_cache,8,3
+	.section	__DATA,__objc_classlist,regular,no_dead_strip
+	.p2align	3, 0x0                          ; @"OBJC_LABEL_CLASS_$"
+l_OBJC_LABEL_CLASS_$:
+	.quad	_OBJC_CLASS_$_Foo
+
+	.section	__DATA,__objc_imageinfo,regular,no_dead_strip
+L_OBJC_IMAGE_INFO:
+	.long	0
+	.long	64
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s
index 83d2810a78139..cf780b0fc6b90 100644
--- a/lld/test/MachO/cstring-tailmerge.s
+++ b/lld/test/MachO/cstring-tailmerge.s
@@ -1,85 +1,85 @@
-# REQUIRES: x86
-# RUN: rm -rf %t; split-file %s %t
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
 
-# RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
-# RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
-# RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
+; RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
+; RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
+; RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
 
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-1.s -o %t/align-1.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-2.s -o %t/align-2.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4.s -o %t/align-4.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o
 
-# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-1.o -o %t/align-1
-# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
 
-# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-2.o -o %t/align-2
-# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
 
-# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-4.o -o %t/align-4
-# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
 
-# CHECK: Contents of (__TEXT,__cstring) section
-# CHECK: [[#%.16x,START:]] get awkward offset{{$}}
+; CHECK: Contents of (__TEXT,__cstring) section
+; CHECK: [[#%.16x,START:]] get awkward offset{{$}}
 
-# ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
-# ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
+; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
+; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
 
-# ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
-# ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
-# ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
-# ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
+; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
 
-# ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
-# ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
-# ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
-# ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
-# ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
+; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
 
-# CHECK: SYMBOL TABLE:
+; CHECK: SYMBOL TABLE:
 
-# ALIGN1: [[#%.16x,START+19]] l     O __TEXT,__cstring _myotherlongstr
-# ALIGN1: [[#%.16x,START+21]] l     O __TEXT,__cstring _otherlongstr
-# ALIGN1: [[#%.16x,START+26]] l     O __TEXT,__cstring _longstr
-# ALIGN1: [[#%.16x,START+34]] l     O __TEXT,__cstring _otherstr
-# ALIGN1: [[#%.16x,START+39]] l     O __TEXT,__cstring _str
+; ALIGN1: [[#%.16x,START+19]] l     O __TEXT,__cstring _myotherlongstr
+; ALIGN1: [[#%.16x,START+21]] l     O __TEXT,__cstring _otherlongstr
+; ALIGN1: [[#%.16x,START+26]] l     O __TEXT,__cstring _longstr
+; ALIGN1: [[#%.16x,START+34]] l     O __TEXT,__cstring _otherstr
+; ALIGN1: [[#%.16x,START+39]] l     O __TEXT,__cstring _str
 
-# ALIGN2: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
-# ALIGN2: [[#%.16x,START+20+2]] l     O __TEXT,__cstring _otherlongstr
-# ALIGN2: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _longstr
-# ALIGN2: [[#%.16x,START+20+16+8]] l     O __TEXT,__cstring _otherstr
-# ALIGN2: [[#%.16x,START+20+16+8+10]] l     O __TEXT,__cstring _str
+; ALIGN2: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
+; ALIGN2: [[#%.16x,START+20+2]] l     O __TEXT,__cstring _otherlongstr
+; ALIGN2: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _longstr
+; ALIGN2: [[#%.16x,START+20+16+8]] l     O __TEXT,__cstring _otherstr
+; ALIGN2: [[#%.16x,START+20+16+8+10]] l     O __TEXT,__cstring _str
 
-# ALIGN4: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
-# ALIGN4: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _otherlongstr
-# ALIGN4: [[#%.16x,START+20+16+16]] l     O __TEXT,__cstring _longstr
-# ALIGN4: [[#%.16x,START+20+16+16+8]] l     O __TEXT,__cstring _otherstr
-# ALIGN4: [[#%.16x,START+20+16+16+8+12]] l     O __TEXT,__cstring _str
+; ALIGN4: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
+; ALIGN4: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _otherlongstr
+; ALIGN4: [[#%.16x,START+20+16+16]] l     O __TEXT,__cstring _longstr
+; ALIGN4: [[#%.16x,START+20+16+16+8]] l     O __TEXT,__cstring _otherstr
+; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l     O __TEXT,__cstring _str
 
-#--- first.s
+;--- first.s
 .cstring
 .p2align 2
-.asciz "get awkward offset"  # length = 19
+.asciz "get awkward offset"  ; length = 19
 
-#--- align.s.template
+;--- align.s.template
 .cstring
 
 .p2align <ALIGN>
   _myotherlongstr:
-.asciz "myotherlongstr"      # length = 15
+.asciz "myotherlongstr"      ; length = 15
 
 .p2align <ALIGN>
   _otherlongstr:
-.asciz   "otherlongstr"      # length = 13, tail offset = 2
+.asciz   "otherlongstr"      ; length = 13, tail offset = 2
 
 .p2align <ALIGN>
   _longstr:
-.asciz        "longstr"      # length = 8, tail offset = 7
+.asciz        "longstr"      ; length = 8, tail offset = 7
 
 .p2align <ALIGN>
   _otherstr:
-.asciz       "otherstr"      # length = 9
+.asciz       "otherstr"      ; length = 9
 
 .p2align <ALIGN>
   _str:
-.asciz            "str"      # length = 4, tail offset = 5
+.asciz            "str"      ; length = 4, tail offset = 5
diff --git a/lld/test/MachO/order-file-cstring-tailmerge.s b/lld/test/MachO/order-file-cstring-tailmerge.s
new file mode 100644
index 0000000000000..4f177ccf1c14f
--- /dev/null
+++ b/lld/test/MachO/order-file-cstring-tailmerge.s
@@ -0,0 +1,56 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s
+; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED
+
+; CHECK: _str2
+; CHECK: _str1
+; CHECK: _superstr2
+; CHECK: _superstr3
+; CHECK: _superstr1
+; CHECK: _str3
+
+; str1 has a higher priority than superstr1, so str1 must be ordered before
+; str3, even though superstr1 is before superstr3 in the orderfile.
+
+; MERGED: _superstr2
+; MERGED: _str2
+; MERGED: _superstr1
+; MERGED: _str1
+; MERGED: _superstr3
+; MERGED: _str3
+
+;--- a.s
+.cstring
+  _superstr1:
+.asciz "superstr1"
+  _str1:
+.asciz "str1"
+  _superstr2:
+.asciz "superstr2"
+  _str2:
+.asciz "str2"
+  _superstr3:
+.asciz "superstr3"
+  _str3:
+.asciz "str3"
+
+; TODO: We could use update_test_body.py to generate the hashes for the
+; orderfile. Unfortunately, it seems that LLVM has a different hash
+; implementation than the xxh64sum tool. See
+; DeduplicatedCStringSection::getStringOffset() for hash details.
+;
+; while IFS="" read -r line; do
+;     echo -n $line | xxh64sum | awk '{printf "CSTR;%010d", and(strtonum("0x"$1), 0x7FFFFFFF)}'
+;     echo " # $line"
+; done < orderfile.txt.template
+
+;--- orderfile.txt
+CSTR;1236462241 # str2
+CSTR;1526669509 # str1
+CSTR;1563550684 # superstr2
+CSTR;1044337806 # superstr3
+CSTR;262417687  # superstr1
+CSTR;717161398  # str3

>From 4fbcb74168cfd7fc1b85417d3440b0ebdd12650a Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Wed, 1 Oct 2025 14:24:51 -0700
Subject: [PATCH 9/9] fix release notes

---
 lld/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index eae632ff9e796..29db1cdf9e9c4 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -46,7 +46,7 @@ MachO Improvements
 
 * ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
   (`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
-* cstrings sections are now tail merged.
+* ``--tail-merge-strings`` enables tail merging of cstring literals.
   (`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)
 
 WebAssembly Improvements



More information about the llvm-commits mailing list