[lld] 31760e8 - [lld-macho] `-exported_symbols` should hide symbols before LTO runs
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 28 14:55:56 PDT 2022
Author: Jez Ng
Date: 2022-07-28T17:55:49-04:00
New Revision: 31760e8189c969d2687c7fa8d67771ab185dd2fb
URL: https://github.com/llvm/llvm-project/commit/31760e8189c969d2687c7fa8d67771ab185dd2fb
DIFF: https://github.com/llvm/llvm-project/commit/31760e8189c969d2687c7fa8d67771ab185dd2fb.diff
LOG: [lld-macho] `-exported_symbols` should hide symbols before LTO runs
We were previously doing it after LTO, which did have the desired effect
of having the un-exported symbols marked as private extern in the final
output binary, but doing it before LTO creates more optimization
opportunities.
One observable difference is that LTO can now elide un-exported symbols
entirely, so they may not even be present as private externs in the
output.
This is also what ld64 implements.
Reviewed By: #lld-macho, thevinster
Differential Revision: https://reviews.llvm.org/D130429
Added:
lld/test/MachO/lto-explicit-exports.ll
Modified:
lld/MachO/Driver.cpp
Removed:
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ce2d55bef456f..bdcd9d30a77e5 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -526,7 +526,7 @@ static void initLLVM() {
InitializeAllAsmParsers();
}
-static void compileBitcodeFiles() {
+static bool compileBitcodeFiles() {
TimeTraceScope timeScope("LTO");
auto *lto = make<BitcodeCompiler>();
for (InputFile *file : inputFiles)
@@ -534,8 +534,11 @@ static void compileBitcodeFiles() {
if (!file->lazy)
lto->add(*bitcodeFile);
- for (ObjFile *file : lto->compile())
+ std::vector<ObjFile *> compiled = lto->compile();
+ for (ObjFile *file : compiled)
inputFiles.insert(file);
+
+ return !compiled.empty();
}
// Replaces common symbols with defined symbols residing in __common sections.
@@ -1145,6 +1148,52 @@ static void referenceStubBinder() {
symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, /*isWeak=*/false);
}
+static void createAliases() {
+ for (const auto &pair : config->aliasedSymbols) {
+ if (const auto &sym = symtab->find(pair.first)) {
+ if (const auto &defined = dyn_cast<Defined>(sym)) {
+ symtab->aliasDefined(defined, pair.second);
+ continue;
+ }
+ }
+
+ warn("undefined base symbol '" + pair.first + "' for alias '" +
+ pair.second + "'\n");
+ }
+}
+
+static void handleExplicitExports() {
+ if (config->hasExplicitExports) {
+ parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
+ if (auto *defined = dyn_cast<Defined>(sym)) {
+ StringRef symbolName = defined->getName();
+ if (config->exportedSymbols.match(symbolName)) {
+ if (defined->privateExtern) {
+ if (defined->weakDefCanBeHidden) {
+ // weak_def_can_be_hidden symbols behave similarly to
+ // private_extern symbols in most cases, except for when
+ // it is explicitly exported.
+ // The former can be exported but the latter cannot.
+ defined->privateExtern = false;
+ } else {
+ warn("cannot export hidden symbol " + toString(*defined) +
+ "\n>>> defined in " + toString(defined->getFile()));
+ }
+ }
+ } else {
+ defined->privateExtern = true;
+ }
+ }
+ });
+ } else if (!config->unexportedSymbols.empty()) {
+ parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
+ if (auto *defined = dyn_cast<Defined>(sym))
+ if (config->unexportedSymbols.match(defined->getName()))
+ defined->privateExtern = true;
+ });
+ }
+}
+
bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
llvm::raw_ostream &stderrOS, bool exitEarly,
bool disableOutput) {
@@ -1593,7 +1642,20 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
for (const Arg *arg : args.filtered(OPT_mllvm))
parseClangOption(arg->getValue(), arg->getSpelling());
- compileBitcodeFiles();
+ createSyntheticSections();
+ createSyntheticSymbols();
+
+ createAliases();
+ // If we are in "explicit exports" mode, hide everything that isn't
+ // explicitly exported. Do this before running LTO so that LTO can better
+ // optimize.
+ handleExplicitExports();
+ // LTO may emit a non-hidden (extern) object file symbol even if the
+ // corresponding bitcode symbol is hidden. In particular, this happens for
+ // cross-module references to hidden symbols under ThinLTO. Thus, if we
+ // compiled any bitcode files, we must redo the symbol hiding.
+ if (compileBitcodeFiles())
+ handleExplicitExports();
replaceCommonSymbols();
StringRef orderFile = args.getLastArgValue(OPT_order_file);
@@ -1605,51 +1667,6 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
// FIXME: should terminate the link early based on errors encountered so
// far?
- createSyntheticSections();
- createSyntheticSymbols();
-
- for (const auto &pair : config->aliasedSymbols) {
- if (const auto &sym = symtab->find(pair.first)) {
- if (const auto &defined = dyn_cast<Defined>(sym)) {
- symtab->aliasDefined(defined, pair.second);
- continue;
- }
- }
-
- warn("undefined base symbol '" + pair.first + "' for alias '" +
- pair.second + "'\n");
- }
-
- if (config->hasExplicitExports) {
- parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
- if (auto *defined = dyn_cast<Defined>(sym)) {
- StringRef symbolName = defined->getName();
- if (config->exportedSymbols.match(symbolName)) {
- if (defined->privateExtern) {
- if (defined->weakDefCanBeHidden) {
- // weak_def_can_be_hidden symbols behave similarly to
- // private_extern symbols in most cases, except for when
- // it is explicitly exported.
- // The former can be exported but the latter cannot.
- defined->privateExtern = false;
- } else {
- warn("cannot export hidden symbol " + toString(*defined) +
- "\n>>> defined in " + toString(defined->getFile()));
- }
- }
- } else {
- defined->privateExtern = true;
- }
- }
- });
- } else if (!config->unexportedSymbols.empty()) {
- parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
- if (auto *defined = dyn_cast<Defined>(sym))
- if (config->unexportedSymbols.match(defined->getName()))
- defined->privateExtern = true;
- });
- }
-
for (const Arg *arg : args.filtered(OPT_sectcreate)) {
StringRef segName = arg->getValue(0);
StringRef sectName = arg->getValue(1);
diff --git a/lld/test/MachO/lto-explicit-exports.ll b/lld/test/MachO/lto-explicit-exports.ll
new file mode 100644
index 0000000000000..8e368b3e4b64a
--- /dev/null
+++ b/lld/test/MachO/lto-explicit-exports.ll
@@ -0,0 +1,81 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+;; Check that `-exported_symbol` causes all non-exported symbols to be marked
+;; as hidden before LTO. We don't want to downgrade them to private extern only
+;; after LTO runs as that likely causes LTO to miss optimization opportunities.
+
+; RUN: llvm-as %t/foo.ll -o %t/foo.o
+; RUN: llvm-as %t/refs-foo.ll -o %t/refs-foo.o
+
+; RUN: %lld -lSystem -dylib %t/foo.o %t/refs-foo.o -o %t/test-fulllto \
+; RUN: -save-temps -exported_symbol _refs_foo -exported_symbol _same_module_caller
+
+; RUN: llvm-dis %t/test-fulllto.0.2.internalize.bc -o - | FileCheck %s --check-prefix=FULLLTO
+; RUN: llvm-objdump --macho --syms %t/test-fulllto | FileCheck %s --check-prefix=FULLLTO-SYMS
+
+; FULLLTO: define internal void @foo()
+; FULLLTO: define internal void @same_module_callee()
+; FULLLTO: define dso_local void @same_module_caller()
+; FULLLTO: define dso_local void @refs_foo()
+
+;; LTO is able to elide the hidden symbols, and they will be entirely absent
+;; from the final symbol table.
+
+; FULLLTO-SYMS: SYMBOL TABLE:
+; FULLLTO-SYMS: g F __TEXT,__text _same_module_caller
+; FULLLTO-SYMS: g F __TEXT,__text _refs_foo
+; FULLLTO-SYMS: *UND* dyld_stub_binder
+; FULLLTO-SYMS-EMPTY:
+
+;; ThinLTO is unable to internalize symbols that are referenced from another
+;; module. Verify that we still mark the final symbol as private extern.
+
+; RUN: opt -module-summary %t/foo.ll -o %t/foo.thinlto.o
+; RUN: opt -module-summary %t/refs-foo.ll -o %t/refs-foo.thinlto.o
+
+; RUN: %lld -lSystem -dylib %t/foo.thinlto.o %t/refs-foo.thinlto.o -o %t/test-thinlto \
+; RUN: -save-temps -exported_symbol _refs_foo -exported_symbol _same_module_caller
+
+; RUN: llvm-dis %t/foo.thinlto.o.2.internalize.bc -o - | FileCheck %s --check-prefix=THINLTO-FOO
+; RUN: llvm-dis %t/refs-foo.thinlto.o.2.internalize.bc -o - | FileCheck %s --check-prefix=THINLTO-REFS-FOO
+; RUN: llvm-objdump --macho --syms %t/test-thinlto | FileCheck %s --check-prefix=THINLTO-SYMS
+
+; THINLTO-FOO: define dso_local void @foo()
+; THINLTO-FOO: define internal void @same_module_callee()
+; THINLTO-REFS-FOO: declare dso_local void @foo()
+; THINLTO-REFS-FOO: define dso_local void @refs_foo()
+
+; THINLTO-SYMS: l F __TEXT,__text _foo
+; THINLTO-SYMS: g F __TEXT,__text _same_module_caller
+; THINLTO-SYMS: g F __TEXT,__text _refs_foo
+
+;--- foo.ll
+
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() {
+ ret void
+}
+
+define void @same_module_callee() {
+ ret void
+}
+
+define void @same_module_caller() {
+ call void @same_module_callee()
+ ret void
+}
+
+;--- refs-foo.ll
+
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @foo()
+
+define void @refs_foo() {
+ call void @foo()
+ ret void
+}
More information about the llvm-commits
mailing list