[lld] [lld-macho] Implement symbol string deduplication (PR #123874)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 07:38:46 PST 2025
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/123874
>From dc3fbc06bcbd42aa21d7348524be41c2afa1e9d0 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Fri, 17 Jan 2025 19:54:17 -0800
Subject: [PATCH 1/4] [lld-macho] Implement symbol string deduplication
---
lld/MachO/Config.h | 2 ++
lld/MachO/Driver.cpp | 2 ++
lld/MachO/Options.td | 5 +++++
lld/MachO/SyntheticSections.cpp | 19 +++++++++++++++++--
lld/MachO/SyntheticSections.h | 1 +
lld/test/MachO/cfstring-dedup.s | 8 ++++++++
6 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index d41ca5382c692a..edc1246acd3e42 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -256,6 +256,8 @@ struct Configuration {
llvm::MachO::PlatformType platform() const {
return platformInfo.target.Platform;
}
+
+ bool deduplicateSymbolStrings = true;
};
extern std::unique_ptr<Configuration> config;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 31630ba7d69de2..9c63ab0739a8a5 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1806,6 +1806,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->keepICFStabs = args.hasArg(OPT_keep_icf_stabs);
config->dedupStrings =
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
+ config->deduplicateSymbolStrings =
+ !args.hasArg(OPT_no_deduplicate_symbol_strings);
config->deadStripDuplicates = args.hasArg(OPT_dead_strip_duplicates);
config->warnDylibInstallName = args.hasFlag(
OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4c89f96c3ebaad..9001e85582c124 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1476,3 +1476,8 @@ def no_warn_duplicate_libraries : Flag<["-"], "no_warn_duplicate_libraries">,
HelpText<"Do not warn if the input contains duplicate library options.">,
Flags<[HelpHidden]>,
Group<grp_ignored_silently>;
+
+// Add this with the other flags in the rare options group
+def no_deduplicate_symbol_strings : Flag<["-"], "no-deduplicate-symbol-strings">,
+ HelpText<"Do not deduplicate strings in the symbol string table. Might result in larger binaries but slightly faster link times.">,
+ Group<grp_rare>;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 417b7cf93efa7d..313860128bcafb 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1540,9 +1540,24 @@ StringTableSection::StringTableSection()
: LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}
uint32_t StringTableSection::addString(StringRef str) {
+ // If deduplication is disabled, just add the string
+ if (!config->deduplicateSymbolStrings) {
+ uint32_t strx = size;
+ strings.push_back(str);
+ size += str.size() + 1; // +1 for null terminator
+ return strx;
+ }
+
+ // Deduplicate strings
+ llvm::CachedHashStringRef hashedStr(str);
+ auto it = stringMap.find(hashedStr);
+ if (it != stringMap.end())
+ return it->second;
+
uint32_t strx = size;
- strings.push_back(str); // TODO: consider deduplicating strings
- size += str.size() + 1; // account for null terminator
+ stringMap[hashedStr] = strx;
+ strings.push_back(str);
+ size += str.size() + 1;
return strx;
}
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index af99f22788d6e9..5796b0790c83a0 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -447,6 +447,7 @@ class StringTableSection final : public LinkEditSection {
// match its behavior here since some tools depend on it.
// Consequently, the empty string will be at index 1, not zero.
std::vector<StringRef> strings{" "};
+ llvm::DenseMap<llvm::CachedHashStringRef, uint32_t> stringMap;
size_t size = 2;
};
diff --git a/lld/test/MachO/cfstring-dedup.s b/lld/test/MachO/cfstring-dedup.s
index fb121cde3e9585..7e1772600b427f 100644
--- a/lld/test/MachO/cfstring-dedup.s
+++ b/lld/test/MachO/cfstring-dedup.s
@@ -7,6 +7,14 @@
# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo
# RUN: llvm-objdump --no-print-imm-hex --macho --rebase --bind --syms -d %t/foo | FileCheck %s --check-prefix=LITERALS
+# Check that string deduplication for symbol names is working
+# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo_no_dedup -no-deduplicate-symbol-strings
+# RUN: count1=$((`llvm-strings %t/foo | grep _named_cfstring | wc -l`))
+# RUN: test $count1 -eq 1
+# RUN: count2=$((`llvm-strings %t/foo_no_dedup | grep _named_cfstring | wc -l`))
+# RUN: test $count2 -eq 2
+
+
# CHECK: (__TEXT,__text) section
# CHECK-NEXT: _foo1:
# CHECK-NEXT: _foo2:
>From d09f3d73de7bfef2453d1c726109564f4fba4719 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 22 Jan 2025 09:29:27 -0800
Subject: [PATCH 2/4] Address Comments #1 & fix test for Windows
---
lld/MachO/SyntheticSections.cpp | 29 +++++++++++------------------
lld/test/MachO/cfstring-dedup.s | 11 +++++++----
2 files changed, 18 insertions(+), 22 deletions(-)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 313860128bcafb..9ad3136372112c 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -791,7 +791,7 @@ void StubHelperSection::writeTo(uint8_t *buf) const {
void StubHelperSection::setUp() {
Symbol *binder = symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr,
- /*isWeakRef=*/false);
+ /*isWeakRef=*/false);
if (auto *undefined = dyn_cast<Undefined>(binder))
treatUndefinedSymbol(*undefined,
"lazy binding (normally in libSystem.dylib)");
@@ -1182,7 +1182,7 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
StabsEntry stab(N_OSO);
stab.sect = target->cpuSubtype;
SmallString<261> path(!file->archiveName.empty() ? file->archiveName
- : file->getName());
+ : file->getName());
std::error_code ec = sys::fs::make_absolute(path);
if (ec)
fatal("failed to get absolute path for " + path);
@@ -1540,24 +1540,17 @@ StringTableSection::StringTableSection()
: LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}
uint32_t StringTableSection::addString(StringRef str) {
- // If deduplication is disabled, just add the string
- if (!config->deduplicateSymbolStrings) {
- uint32_t strx = size;
- strings.push_back(str);
- size += str.size() + 1; // +1 for null terminator
- return strx;
- }
-
- // Deduplicate strings
- llvm::CachedHashStringRef hashedStr(str);
- auto it = stringMap.find(hashedStr);
- if (it != stringMap.end())
- return it->second;
-
uint32_t strx = size;
- stringMap[hashedStr] = strx;
+ if (config->deduplicateSymbolStrings) {
+ // Deduplicate strings
+ llvm::CachedHashStringRef hashedStr(str);
+ auto [it, inserted] = stringMap.try_emplace(hashedStr, strx);
+ if (!inserted)
+ return it->second;
+ }
+
strings.push_back(str);
- size += str.size() + 1;
+ size += str.size() + 1; // +1 for null terminator
return strx;
}
diff --git a/lld/test/MachO/cfstring-dedup.s b/lld/test/MachO/cfstring-dedup.s
index 7e1772600b427f..4f490ba4380e13 100644
--- a/lld/test/MachO/cfstring-dedup.s
+++ b/lld/test/MachO/cfstring-dedup.s
@@ -9,10 +9,13 @@
# Check that string deduplication for symbol names is working
# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo_no_dedup -no-deduplicate-symbol-strings
-# RUN: count1=$((`llvm-strings %t/foo | grep _named_cfstring | wc -l`))
-# RUN: test $count1 -eq 1
-# RUN: count2=$((`llvm-strings %t/foo_no_dedup | grep _named_cfstring | wc -l`))
-# RUN: test $count2 -eq 2
+# RUN: llvm-strings %t/foo | FileCheck %s --check-prefix=CHECK-DEDUP
+# RUN: llvm-strings %t/foo_no_dedup | FileCheck %s --check-prefix=CHECK-NO-DEDUP
+# CHECK-DEDUP: _named_cfstring
+# CHECK-DEDUP-NOT: _named_cfstring
+# CHECK-NO-DEDUP: _named_cfstring
+# CHECK-NO-DEDUP: _named_cfstring
+# CHECK-NO-DEDUP-NOT: _named_cfstring
# CHECK: (__TEXT,__text) section
>From 7cb0fc4b182d3107a771fd04d6a47d057c90159f Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 22 Jan 2025 09:47:29 -0800
Subject: [PATCH 3/4] Fix formatting
---
lld/MachO/SyntheticSections.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 9ad3136372112c..f45e5bb071c575 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -791,7 +791,7 @@ void StubHelperSection::writeTo(uint8_t *buf) const {
void StubHelperSection::setUp() {
Symbol *binder = symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr,
- /*isWeakRef=*/false);
+ /*isWeakRef=*/false);
if (auto *undefined = dyn_cast<Undefined>(binder))
treatUndefinedSymbol(*undefined,
"lazy binding (normally in libSystem.dylib)");
@@ -1182,7 +1182,7 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
StabsEntry stab(N_OSO);
stab.sect = target->cpuSubtype;
SmallString<261> path(!file->archiveName.empty() ? file->archiveName
- : file->getName());
+ : file->getName());
std::error_code ec = sys::fs::make_absolute(path);
if (ec)
fatal("failed to get absolute path for " + path);
@@ -1548,9 +1548,9 @@ uint32_t StringTableSection::addString(StringRef str) {
if (!inserted)
return it->second;
}
-
+
strings.push_back(str);
- size += str.size() + 1; // +1 for null terminator
+ size += str.size() + 1; // account for null terminator
return strx;
}
>From 516976b194ca6d13a48358f41932160ee23ec958 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Thu, 23 Jan 2025 07:38:01 -0800
Subject: [PATCH 4/4] Address Feedback Nr.2
---
lld/MachO/Config.h | 3 +--
lld/MachO/Driver.cpp | 3 +--
lld/MachO/SyntheticSections.cpp | 3 +--
3 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index edc1246acd3e42..f8dcc84e4ee1ba 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -143,6 +143,7 @@ struct Configuration {
bool timeTraceEnabled = false;
bool dataConst = false;
bool dedupStrings = true;
+ bool dedupSymbolStrings = true;
bool deadStripDuplicates = false;
bool omitDebugInfo = false;
bool warnDylibInstallName = false;
@@ -256,8 +257,6 @@ struct Configuration {
llvm::MachO::PlatformType platform() const {
return platformInfo.target.Platform;
}
-
- bool deduplicateSymbolStrings = true;
};
extern std::unique_ptr<Configuration> config;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9c63ab0739a8a5..4f6c9b4ddc7984 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1806,8 +1806,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->keepICFStabs = args.hasArg(OPT_keep_icf_stabs);
config->dedupStrings =
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
- config->deduplicateSymbolStrings =
- !args.hasArg(OPT_no_deduplicate_symbol_strings);
+ config->dedupSymbolStrings = !args.hasArg(OPT_no_deduplicate_symbol_strings);
config->deadStripDuplicates = args.hasArg(OPT_dead_strip_duplicates);
config->warnDylibInstallName = args.hasFlag(
OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index f45e5bb071c575..99a46bc1508330 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1541,8 +1541,7 @@ StringTableSection::StringTableSection()
uint32_t StringTableSection::addString(StringRef str) {
uint32_t strx = size;
- if (config->deduplicateSymbolStrings) {
- // Deduplicate strings
+ if (config->dedupSymbolStrings) {
llvm::CachedHashStringRef hashedStr(str);
auto [it, inserted] = stringMap.try_emplace(hashedStr, strx);
if (!inserted)
More information about the llvm-commits
mailing list