[lld] d0e9890 - [lld][MachO] Tail merge strings (#161262)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 3 09:38:17 PDT 2025
Author: Ellis Hoag
Date: 2025-10-03T16:38:10Z
New Revision: d0e98909d28be377408b1e52fa35423a2236036c
URL: https://github.com/llvm/llvm-project/commit/d0e98909d28be377408b1e52fa35423a2236036c
DIFF: https://github.com/llvm/llvm-project/commit/d0e98909d28be377408b1e52fa35423a2236036c.diff
LOG: [lld][MachO] Tail merge strings (#161262)
Add the flag `--tail-merge-strings` to enable tail merging of cstrings.
For example, if we have strings `mystring\0` and `ring\0`, we could
place `mystring\0` at address `0x1000` and `ring\0` at address `0x1004`
and have them share the same underlying data.
It turns out that many ObjC method names can be tail merged. For
example, `error:` and `doFoo:error:`. On a large iOS binary, we saw
nearly a 15% size improvement in the `__TEXT__objc_methname` section and
negligible impact on link time.
```
$ bloaty --domain=vm merged.o.stripped -- base.o.stripped
VM SIZE
--------------
+95% +5.85Ki [__TEXT]
-2.4% -239Ki __TEXT,__cstring
-14.5% -710Ki __TEXT,__objc_methname
-1.0% -944Ki TOTAL
```
Tail merging for MachO was originally removed in
https://github.com/llvm/llvm-project/commit/7c269db779ff3950bac2e25ea78b14b4e2b8b247.
The previous implementation used `StringTableBuilder`, but that was
removed in
https://github.com/llvm/llvm-project/commit/4308f031cd0c679c539914608134b9c8046743b3
to ensure deduplicated strings are aligned correctly. This
implementation ensures that tail merged strings are also aligned
correctly.
Special thanks to nocchijiang for pointing this out in
https://github.com/llvm/llvm-project/pull/158720#issuecomment-3310416030.
Depends on https://github.com/llvm/llvm-project/pull/161253.
Added:
lld/test/MachO/cstring-tailmerge-objc.s
lld/test/MachO/cstring-tailmerge.s
lld/test/MachO/order-file-cstring-tailmerge.s
Modified:
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/Options.td
lld/MachO/SyntheticSections.cpp
lld/docs/ReleaseNotes.rst
Removed:
################################################################################
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 51b1363d87615..a2ca5770bf952 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -223,6 +223,7 @@ struct Configuration {
bool warnThinArchiveMissingMembers;
bool disableVerify;
bool separateCstringLiteralSections;
+ bool tailMergeStrings;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7ce987e400a24..94f441b7643a7 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1986,6 +1986,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->separateCstringLiteralSections =
args.hasFlag(OPT_separate_cstring_literal_sections,
OPT_no_separate_cstring_literal_sections, false);
+ config->tailMergeStrings =
+ args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4eeb8fbe11121..be1a1cc2963d9 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections
"Emit all cstring literals into the __cstring section. As a special "
"case, the __objc_methname section will still be emitted. (default)">,
Group<grp_rare>;
+defm tail_merge_strings
+ : BB<"tail-merge-strings", "Enable string tail merging",
+ "Disable string tail merging to improve link-time performance">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 903ba78a27c75..187cccbe90dbc 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1746,6 +1746,8 @@ void CStringSection::finalizeContents() {
void DeduplicatedCStringSection::finalizeContents() {
// Find the largest alignment required for each string.
DenseMap<CachedHashStringRef, Align> strToAlignment;
+ // Used for tail merging only
+ std::vector<CachedHashStringRef> deduplicatedStrs;
for (const CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
@@ -1754,17 +1756,66 @@ void DeduplicatedCStringSection::finalizeContents() {
assert(isec->align != 0);
auto align = getStringPieceAlignment(isec, piece);
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
+ if (config->tailMergeStrings && wasInserted)
+ deduplicatedStrs.push_back(s);
if (!wasInserted && it->second < align)
it->second = align;
}
}
+ // Like lexigraphical sort, except we read strings in reverse and take the
+ // longest string first
+ // TODO: We could improve performance by implementing our own sort that avoids
+ // comparing characters we know to be the same. See
+ // StringTableBuilder::multikeySort() for details
+ llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
+ for (const auto &[leftChar, rightChar] :
+ llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
+ if (leftChar == rightChar)
+ continue;
+ return leftChar < rightChar;
+ }
+ return left.size() > right.size();
+ });
+ std::optional<CachedHashStringRef> mergeCandidate;
+ DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
+ tailMergeMap;
+ for (auto &s : deduplicatedStrs) {
+ if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
+ mergeCandidate = s;
+ continue;
+ }
+ uint64_t tailMergeOffset = mergeCandidate->size() - s.size();
+ // TODO: If the tail offset is incompatible with this string's alignment, we
+ // might be able to find another superstring with a compatible tail offset.
+ // The
diff iculty is how to do this efficiently
+ const auto &align = strToAlignment.at(s);
+ if (!isAligned(align, tailMergeOffset))
+ continue;
+ auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
+ if (align > mergeCandidateAlign)
+ mergeCandidateAlign = align;
+ tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset);
+ }
+
// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
auto &piece = isec->pieces[i];
auto s = isec->getCachedHashStringRef(i);
+ // Any string can be tail merged with itself with an offset of zero
+ uint64_t tailMergeOffset = 0;
+ auto mergeIt =
+ config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end();
+ if (mergeIt != tailMergeMap.end()) {
+ auto &[superString, offset] = mergeIt->second;
+ // s can be tail merged with superString. Do not layout s. Instead layout
+ // superString if we haven't already
+ assert(superString.val().ends_with(s.val()));
+ s = superString;
+ tailMergeOffset = offset;
+ }
auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
if (wasInserted) {
// Avoid computing the offset until we are sure we will need to
@@ -1772,9 +1823,12 @@ void DeduplicatedCStringSection::finalizeContents() {
it->second = offset;
size = offset + s.size() + 1; // account for null terminator
}
- // If the string was already in stringOffsetMap, it is a duplicate and we
- // only need to assign the offset.
- piece.outSecOff = it->second;
+ piece.outSecOff = it->second + tailMergeOffset;
+ if (mergeIt != tailMergeMap.end()) {
+ auto &tailMergedString = mergeIt->first;
+ stringOffsetMap[tailMergedString] = piece.outSecOff;
+ assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
+ }
}
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 566dde6e08115..29db1cdf9e9c4 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -46,6 +46,8 @@ MachO Improvements
* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
(`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
+* ``--tail-merge-strings`` enables tail merging of cstring literals.
+ (`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)
WebAssembly Improvements
------------------------
diff --git a/lld/test/MachO/cstring-tailmerge-objc.s b/lld/test/MachO/cstring-tailmerge-objc.s
new file mode 100644
index 0000000000000..46b2bbf9dcd9a
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge-objc.s
@@ -0,0 +1,144 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; Test that ObjC method names are tail merged and
+; ObjCSelRefsHelper::makeSelRef() still works correctly
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a
+; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error
+
+; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge
+; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error
+
+; CHECK: withBar:error:
+; NOMERGE: error:
+
+;--- a.mm
+__attribute__((objc_root_class))
+ at interface Foo
+- (void)withBar:(int)bar error:(int)error;
+- (void)error:(int)error;
+ at end
+
+ at implementation Foo
+- (void)withBar:(int)bar error:(int)error {}
+- (void)error:(int)error {}
+ at end
+
+void *_objc_empty_cache;
+void *_objc_empty_vtable;
+;--- gen
+clang -Oz -target arm64-apple-darwin a.mm -S -o -
+;--- a.s
+ .build_version macos, 11, 0
+ .section __TEXT,__text,regular,pure_instructions
+ .p2align 2 ; -- Begin function -[Foo withBar:error:]
+"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]"
+ .cfi_startproc
+; %bb.0:
+ ret
+ .cfi_endproc
+ ; -- End function
+ .p2align 2 ; -- Begin function -[Foo error:]
+"-[Foo error:]": ; @"\01-[Foo error:]"
+ .cfi_startproc
+; %bb.0:
+ ret
+ .cfi_endproc
+ ; -- End function
+ .globl __objc_empty_vtable ; @_objc_empty_vtable
+.zerofill __DATA,__common,__objc_empty_vtable,8,3
+ .section __DATA,__objc_data
+ .globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo"
+ .p2align 3, 0x0
+_OBJC_CLASS_$_Foo:
+ .quad _OBJC_METACLASS_$_Foo
+ .quad 0
+ .quad __objc_empty_cache
+ .quad __objc_empty_vtable
+ .quad __OBJC_CLASS_RO_$_Foo
+
+ .globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo"
+ .p2align 3, 0x0
+_OBJC_METACLASS_$_Foo:
+ .quad _OBJC_METACLASS_$_Foo
+ .quad _OBJC_CLASS_$_Foo
+ .quad __objc_empty_cache
+ .quad __objc_empty_vtable
+ .quad __OBJC_METACLASS_RO_$_Foo
+
+ .section __TEXT,__objc_classname,cstring_literals
+l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_
+ .asciz "Foo"
+
+ .section __DATA,__objc_const
+ .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo"
+__OBJC_METACLASS_RO_$_Foo:
+ .long 3 ; 0x3
+ .long 40 ; 0x28
+ .long 40 ; 0x28
+ .space 4
+ .quad 0
+ .quad l_OBJC_CLASS_NAME_
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+
+ .section __TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_
+ .asciz "withBar:error:"
+
+ .section __TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_
+ .asciz "v24 at 0:8i16i20"
+
+ .section __TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1
+ .asciz "error:"
+
+ .section __TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2
+ .asciz "v20 at 0:8i16"
+
+ .section __DATA,__objc_const
+ .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo"
+__OBJC_$_INSTANCE_METHODS_Foo:
+ .long 24 ; 0x18
+ .long 2 ; 0x2
+ .quad l_OBJC_METH_VAR_NAME_
+ .quad l_OBJC_METH_VAR_TYPE_
+ .quad "-[Foo withBar:error:]"
+ .quad l_OBJC_METH_VAR_NAME_.1
+ .quad l_OBJC_METH_VAR_TYPE_.2
+ .quad "-[Foo error:]"
+
+ .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo"
+__OBJC_CLASS_RO_$_Foo:
+ .long 2 ; 0x2
+ .long 0 ; 0x0
+ .long 0 ; 0x0
+ .space 4
+ .quad 0
+ .quad l_OBJC_CLASS_NAME_
+ .quad __OBJC_$_INSTANCE_METHODS_Foo
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+
+ .globl __objc_empty_cache ; @_objc_empty_cache
+.zerofill __DATA,__common,__objc_empty_cache,8,3
+ .section __DATA,__objc_classlist,regular,no_dead_strip
+ .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$"
+l_OBJC_LABEL_CLASS_$:
+ .quad _OBJC_CLASS_$_Foo
+
+ .section __DATA,__objc_imageinfo,regular,no_dead_strip
+L_OBJC_IMAGE_INFO:
+ .long 0
+ .long 64
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s
new file mode 100644
index 0000000000000..740f971eb4bb8
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge.s
@@ -0,0 +1,85 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
+; RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
+; RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o
+
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
+
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
+
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
+
+; CHECK: Contents of (__TEXT,__cstring) section
+; CHECK: [[#%.16x,START:]] get awkward offset{{$}}
+
+; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
+; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
+
+; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
+
+; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
+
+; CHECK: SYMBOL TABLE:
+
+; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr
+; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr
+; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr
+; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr
+; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str
+
+; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
+; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr
+; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr
+; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr
+; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str
+
+; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
+; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr
+; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr
+; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr
+; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str
+
+;--- first.s
+.cstring
+.p2align 2
+.asciz "get awkward offset" ; length = 19
+
+;--- align.s.template
+.cstring
+
+.p2align <ALIGN>
+ _myotherlongstr:
+.asciz "myotherlongstr" ; length = 15
+
+.p2align <ALIGN>
+ _otherlongstr:
+.asciz "otherlongstr" ; length = 13, tail offset = 2
+
+.p2align <ALIGN>
+ _longstr:
+.asciz "longstr" ; length = 8, tail offset = 7
+
+.p2align <ALIGN>
+ _otherstr:
+.asciz "otherstr" ; length = 9
+
+.p2align <ALIGN>
+ _str:
+.asciz "str" ; length = 4, tail offset = 5
diff --git a/lld/test/MachO/order-file-cstring-tailmerge.s b/lld/test/MachO/order-file-cstring-tailmerge.s
new file mode 100644
index 0000000000000..20a4d162c573a
--- /dev/null
+++ b/lld/test/MachO/order-file-cstring-tailmerge.s
@@ -0,0 +1,56 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED
+
+; CHECK: _str2
+; CHECK: _str1
+; CHECK: _superstr2
+; CHECK: _superstr3
+; CHECK: _superstr1
+; CHECK: _str3
+
+; str1 has a higher priority than superstr1, so str1 must be ordered before
+; str3, even though superstr1 is before superstr3 in the orderfile.
+
+; MERGED: _superstr2
+; MERGED: _str2
+; MERGED: _superstr1
+; MERGED: _str1
+; MERGED: _superstr3
+; MERGED: _str3
+
+;--- a.s
+.cstring
+ _superstr1:
+.asciz "superstr1"
+ _str1:
+.asciz "str1"
+ _superstr2:
+.asciz "superstr2"
+ _str2:
+.asciz "str2"
+ _superstr3:
+.asciz "superstr3"
+ _str3:
+.asciz "str3"
+
+; TODO: We could use update_test_body.py to generate the hashes for the
+; orderfile. Unfortunately, it seems that LLVM has a
diff erent hash
+; implementation than the xxh64sum tool. See
+; DeduplicatedCStringSection::getStringOffset() for hash details.
+;
+; while IFS="" read -r line; do
+; echo -n $line | xxh64sum | awk '{printf "CSTR;%010d", and(strtonum("0x"$1), 0x7FFFFFFF)}'
+; echo " # $line"
+; done < orderfile.txt.template
+
+;--- orderfile.txt
+CSTR;1236462241 # str2
+CSTR;1526669509 # str1
+CSTR;1563550684 # superstr2
+CSTR;1044337806 # superstr3
+CSTR;262417687 # superstr1
+CSTR;717161398 # str3
More information about the llvm-commits
mailing list