[llvm-branch-commits] [lld] [lld][MachO] Tail merge strings (PR #161262)

Mon Sep 29 12:22:56 PDT 2025

https://github.com/ellishg created https://github.com/llvm/llvm-project/pull/161262


TODO: Measure size win

TODO: Measure performance

Depends on https://github.com/llvm/llvm-project/pull/161253.

>From 4742cca2ec71b6651f76c12b9d4aea2706cec02c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 29 Sep 2025 12:21:49 -0700
Subject: [PATCH] [lld][MachO] Tail merge strings

---
 lld/MachO/SyntheticSections.cpp    | 58 +++++++++++++++++++-
 lld/test/MachO/cstring-dedup.s     |  3 +-
 lld/test/MachO/cstring-tailmerge.s | 85 ++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 3 deletions(-)
 create mode 100644 lld/test/MachO/cstring-tailmerge.s

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 903ba78a27c75..460a0b5a16ab0 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1746,6 +1746,7 @@ void CStringSection::finalizeContents() {
 void DeduplicatedCStringSection::finalizeContents() {
   // Find the largest alignment required for each string.
   DenseMap<CachedHashStringRef, Align> strToAlignment;
+  std::vector<CachedHashStringRef> deduplicatedStrs;
   for (const CStringInputSection *isec : inputs) {
     for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
       if (!piece.live)
@@ -1754,17 +1755,57 @@ void DeduplicatedCStringSection::finalizeContents() {
       assert(isec->align != 0);
       auto align = getStringPieceAlignment(isec, piece);
       auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
+      if (wasInserted)
+        deduplicatedStrs.push_back(s);
       if (!wasInserted && it->second < align)
         it->second = align;
     }
   }
 
+  // Like lexigraphical sort, except we read strings in reverse and take the
+  // longest string first
+  // TODO: We could improve performance by implementing our own sort that avoids
+  // comparing characters we know to be the same. See
+  // StringTableBuilder::multikeySort() for details
+  llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
+    for (const auto &[leftChar, rightChar] :
+         llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
+      if (leftChar == rightChar)
+        continue;
+      return leftChar < rightChar;
+    }
+    return left.size() > right.size();
+  });
+  std::optional<CachedHashStringRef> mergeCandidate;
+  DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
+      tailMergeMap;
+  for (auto &s : deduplicatedStrs) {
+    if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
+      mergeCandidate = s;
+      continue;
+    }
+    uint64_t tailOffset = mergeCandidate->size() - s.size();
+    // TODO: If the tail offset is incompatible with this string's alignment, we
+    // might be able to find another superstring with a compatible tail offset.
+    // The difficulty is how to do this efficiently
+    const auto &align = strToAlignment.at(s);
+    if (!isAligned(align, tailOffset))
+      continue;
+    auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
+    if (align > mergeCandidateAlign)
+      mergeCandidateAlign = align;
+    tailMergeMap.try_emplace(s, *mergeCandidate, tailOffset);
+  }
+
   // Sort the strings for performance and compression size win, and then
   // assign an offset for each string and save it to the corresponding
   // StringPieces for easy access.
   for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
     auto &piece = isec->pieces[i];
     auto s = isec->getCachedHashStringRef(i);
+    // Skip tail merged strings until their superstring offsets are resolved
+    if (tailMergeMap.count(s))
+      continue;
     auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
     if (wasInserted) {
       // Avoid computing the offset until we are sure we will need to
@@ -1776,8 +1817,23 @@ void DeduplicatedCStringSection::finalizeContents() {
     // only need to assign the offset.
     piece.outSecOff = it->second;
   }
-  for (CStringInputSection *isec : inputs)
+  for (CStringInputSection *isec : inputs) {
+    for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
+      if (!piece.live)
+        continue;
+      auto s = isec->getCachedHashStringRef(i);
+      auto it = tailMergeMap.find(s);
+      if (it == tailMergeMap.end())
+        continue;
+      const auto &[superString, tailOffset] = it->second;
+      assert(!tailMergeMap.count(superString));
+      auto &outSecOff = stringOffsetMap[s];
+      outSecOff = stringOffsetMap.at(superString) + tailOffset;
+      piece.outSecOff = outSecOff;
+      assert(isAligned(strToAlignment.at(s), piece.outSecOff));
+    }
     isec->isFinal = true;
+  }
 }
 
 void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s
index a4b15f26afff0..0a42b3d6fcff3 100644
--- a/lld/test/MachO/cstring-dedup.s
+++ b/lld/test/MachO/cstring-dedup.s
@@ -8,11 +8,10 @@
 # RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s
 # RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER
 
-## Make sure we only have 3 deduplicated strings in __cstring.
+## Make sure we only have 2 deduplicated strings in __cstring.
 # STR: Contents of (__TEXT,__cstring) section
 # STR: {{[[:xdigit:]]+}} foo
 # STR: {{[[:xdigit:]]+}} barbaz
-# STR: {{[[:xdigit:]]+}} {{$}}
 
 ## Make sure both symbol and section relocations point to the right thing.
 # CHECK:      Contents of (__DATA,ptrs) section
diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s
new file mode 100644
index 0000000000000..83d2810a78139
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge.s
@@ -0,0 +1,85 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
+# RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
+# RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-1.s -o %t/align-1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-2.s -o %t/align-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4.s -o %t/align-4.o
+
+# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-1.o -o %t/align-1
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
+
+# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-2.o -o %t/align-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
+
+# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-4.o -o %t/align-4
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
+
+# CHECK: Contents of (__TEXT,__cstring) section
+# CHECK: [[#%.16x,START:]] get awkward offset{{$}}
+
+# ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
+# ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
+
+# ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
+# ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
+# ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
+# ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
+
+# ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
+# ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
+
+# CHECK: SYMBOL TABLE:
+
+# ALIGN1: [[#%.16x,START+19]] l     O __TEXT,__cstring _myotherlongstr
+# ALIGN1: [[#%.16x,START+21]] l     O __TEXT,__cstring _otherlongstr
+# ALIGN1: [[#%.16x,START+26]] l     O __TEXT,__cstring _longstr
+# ALIGN1: [[#%.16x,START+34]] l     O __TEXT,__cstring _otherstr
+# ALIGN1: [[#%.16x,START+39]] l     O __TEXT,__cstring _str
+
+# ALIGN2: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
+# ALIGN2: [[#%.16x,START+20+2]] l     O __TEXT,__cstring _otherlongstr
+# ALIGN2: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _longstr
+# ALIGN2: [[#%.16x,START+20+16+8]] l     O __TEXT,__cstring _otherstr
+# ALIGN2: [[#%.16x,START+20+16+8+10]] l     O __TEXT,__cstring _str
+
+# ALIGN4: [[#%.16x,START+20]] l     O __TEXT,__cstring _myotherlongstr
+# ALIGN4: [[#%.16x,START+20+16]] l     O __TEXT,__cstring _otherlongstr
+# ALIGN4: [[#%.16x,START+20+16+16]] l     O __TEXT,__cstring _longstr
+# ALIGN4: [[#%.16x,START+20+16+16+8]] l     O __TEXT,__cstring _otherstr
+# ALIGN4: [[#%.16x,START+20+16+16+8+12]] l     O __TEXT,__cstring _str
+
+#--- first.s
+.cstring
+.p2align 2
+.asciz "get awkward offset"  # length = 19
+
+#--- align.s.template
+.cstring
+
+.p2align <ALIGN>
+  _myotherlongstr:
+.asciz "myotherlongstr"      # length = 15
+
+.p2align <ALIGN>
+  _otherlongstr:
+.asciz   "otherlongstr"      # length = 13, tail offset = 2
+
+.p2align <ALIGN>
+  _longstr:
+.asciz        "longstr"      # length = 8, tail offset = 7
+
+.p2align <ALIGN>
+  _otherstr:
+.asciz       "otherstr"      # length = 9
+
+.p2align <ALIGN>
+  _str:
+.asciz            "str"      # length = 4, tail offset = 5