[lld] 31760e8 - [lld-macho] `-exported_symbols` should hide symbols before LTO runs

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 28 14:55:56 PDT 2022


Author: Jez Ng
Date: 2022-07-28T17:55:49-04:00
New Revision: 31760e8189c969d2687c7fa8d67771ab185dd2fb

URL: https://github.com/llvm/llvm-project/commit/31760e8189c969d2687c7fa8d67771ab185dd2fb
DIFF: https://github.com/llvm/llvm-project/commit/31760e8189c969d2687c7fa8d67771ab185dd2fb.diff

LOG: [lld-macho] `-exported_symbols` should hide symbols before LTO runs

We were previously doing it after LTO, which did have the desired effect
of having the un-exported symbols marked as private extern in the final
output binary, but doing it before LTO creates more optimization
opportunities.

One observable difference is that LTO can now elide un-exported symbols
entirely, so they may not even be present as private externs in the
output.

This is also what ld64 implements.

Reviewed By: #lld-macho, thevinster

Differential Revision: https://reviews.llvm.org/D130429

Added: 
    lld/test/MachO/lto-explicit-exports.ll

Modified: 
    lld/MachO/Driver.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ce2d55bef456f..bdcd9d30a77e5 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -526,7 +526,7 @@ static void initLLVM() {
   InitializeAllAsmParsers();
 }
 
-static void compileBitcodeFiles() {
+static bool compileBitcodeFiles() {
   TimeTraceScope timeScope("LTO");
   auto *lto = make<BitcodeCompiler>();
   for (InputFile *file : inputFiles)
@@ -534,8 +534,11 @@ static void compileBitcodeFiles() {
       if (!file->lazy)
         lto->add(*bitcodeFile);
 
-  for (ObjFile *file : lto->compile())
+  std::vector<ObjFile *> compiled = lto->compile();
+  for (ObjFile *file : compiled)
     inputFiles.insert(file);
+
+  return !compiled.empty();
 }
 
 // Replaces common symbols with defined symbols residing in __common sections.
@@ -1145,6 +1148,52 @@ static void referenceStubBinder() {
   symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, /*isWeak=*/false);
 }
 
+static void createAliases() {
+  for (const auto &pair : config->aliasedSymbols) {
+    if (const auto &sym = symtab->find(pair.first)) {
+      if (const auto &defined = dyn_cast<Defined>(sym)) {
+        symtab->aliasDefined(defined, pair.second);
+        continue;
+      }
+    }
+
+    warn("undefined base symbol '" + pair.first + "' for alias '" +
+         pair.second + "'\n");
+  }
+}
+
+static void handleExplicitExports() {
+  if (config->hasExplicitExports) {
+    parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
+      if (auto *defined = dyn_cast<Defined>(sym)) {
+        StringRef symbolName = defined->getName();
+        if (config->exportedSymbols.match(symbolName)) {
+          if (defined->privateExtern) {
+            if (defined->weakDefCanBeHidden) {
+              // weak_def_can_be_hidden symbols behave similarly to
+              // private_extern symbols in most cases, except for when
+              // it is explicitly exported.
+              // The former can be exported but the latter cannot.
+              defined->privateExtern = false;
+            } else {
+              warn("cannot export hidden symbol " + toString(*defined) +
+                   "\n>>> defined in " + toString(defined->getFile()));
+            }
+          }
+        } else {
+          defined->privateExtern = true;
+        }
+      }
+    });
+  } else if (!config->unexportedSymbols.empty()) {
+    parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
+      if (auto *defined = dyn_cast<Defined>(sym))
+        if (config->unexportedSymbols.match(defined->getName()))
+          defined->privateExtern = true;
+    });
+  }
+}
+
 bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
                  llvm::raw_ostream &stderrOS, bool exitEarly,
                  bool disableOutput) {
@@ -1593,7 +1642,20 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
     for (const Arg *arg : args.filtered(OPT_mllvm))
       parseClangOption(arg->getValue(), arg->getSpelling());
 
-    compileBitcodeFiles();
+    createSyntheticSections();
+    createSyntheticSymbols();
+
+    createAliases();
+    // If we are in "explicit exports" mode, hide everything that isn't
+    // explicitly exported. Do this before running LTO so that LTO can better
+    // optimize.
+    handleExplicitExports();
+    // LTO may emit a non-hidden (extern) object file symbol even if the
+    // corresponding bitcode symbol is hidden. In particular, this happens for
+    // cross-module references to hidden symbols under ThinLTO. Thus, if we
+    // compiled any bitcode files, we must redo the symbol hiding.
+    if (compileBitcodeFiles())
+      handleExplicitExports();
     replaceCommonSymbols();
 
     StringRef orderFile = args.getLastArgValue(OPT_order_file);
@@ -1605,51 +1667,6 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
     // FIXME: should terminate the link early based on errors encountered so
     // far?
 
-    createSyntheticSections();
-    createSyntheticSymbols();
-
-    for (const auto &pair : config->aliasedSymbols) {
-      if (const auto &sym = symtab->find(pair.first)) {
-        if (const auto &defined = dyn_cast<Defined>(sym)) {
-          symtab->aliasDefined(defined, pair.second);
-          continue;
-        }
-      }
-
-      warn("undefined base symbol '" + pair.first + "' for alias '" +
-           pair.second + "'\n");
-    }
-
-    if (config->hasExplicitExports) {
-      parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
-        if (auto *defined = dyn_cast<Defined>(sym)) {
-          StringRef symbolName = defined->getName();
-          if (config->exportedSymbols.match(symbolName)) {
-            if (defined->privateExtern) {
-              if (defined->weakDefCanBeHidden) {
-                // weak_def_can_be_hidden symbols behave similarly to
-                // private_extern symbols in most cases, except for when
-                // it is explicitly exported.
-                // The former can be exported but the latter cannot.
-                defined->privateExtern = false;
-              } else {
-                warn("cannot export hidden symbol " + toString(*defined) +
-                     "\n>>> defined in " + toString(defined->getFile()));
-              }
-            }
-          } else {
-            defined->privateExtern = true;
-          }
-        }
-      });
-    } else if (!config->unexportedSymbols.empty()) {
-      parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
-        if (auto *defined = dyn_cast<Defined>(sym))
-          if (config->unexportedSymbols.match(defined->getName()))
-            defined->privateExtern = true;
-      });
-    }
-
     for (const Arg *arg : args.filtered(OPT_sectcreate)) {
       StringRef segName = arg->getValue(0);
       StringRef sectName = arg->getValue(1);

diff  --git a/lld/test/MachO/lto-explicit-exports.ll b/lld/test/MachO/lto-explicit-exports.ll
new file mode 100644
index 0000000000000..8e368b3e4b64a
--- /dev/null
+++ b/lld/test/MachO/lto-explicit-exports.ll
@@ -0,0 +1,81 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+;; Check that `-exported_symbol` causes all non-exported symbols to be marked
+;; as hidden before LTO. We don't want to downgrade them to private extern only
+;; after LTO runs as that likely causes LTO to miss optimization opportunities.
+
+; RUN: llvm-as %t/foo.ll -o %t/foo.o
+; RUN: llvm-as %t/refs-foo.ll -o %t/refs-foo.o
+
+; RUN: %lld -lSystem -dylib %t/foo.o %t/refs-foo.o -o %t/test-fulllto \
+; RUN:  -save-temps -exported_symbol _refs_foo -exported_symbol _same_module_caller
+
+; RUN: llvm-dis %t/test-fulllto.0.2.internalize.bc -o - | FileCheck %s --check-prefix=FULLLTO
+; RUN: llvm-objdump --macho --syms %t/test-fulllto | FileCheck %s --check-prefix=FULLLTO-SYMS
+
+; FULLLTO: define internal void @foo()
+; FULLLTO: define internal void @same_module_callee()
+; FULLLTO: define dso_local void @same_module_caller()
+; FULLLTO: define dso_local void @refs_foo()
+
+;; LTO is able to elide the hidden symbols, and they will be entirely absent
+;; from the final symbol table.
+
+; FULLLTO-SYMS:       SYMBOL TABLE:
+; FULLLTO-SYMS:       g     F __TEXT,__text _same_module_caller
+; FULLLTO-SYMS:       g     F __TEXT,__text _refs_foo
+; FULLLTO-SYMS:       *UND* dyld_stub_binder
+; FULLLTO-SYMS-EMPTY:
+
+;; ThinLTO is unable to internalize symbols that are referenced from another
+;; module. Verify that we still mark the final symbol as private extern.
+
+; RUN: opt -module-summary %t/foo.ll -o %t/foo.thinlto.o
+; RUN: opt -module-summary %t/refs-foo.ll -o %t/refs-foo.thinlto.o
+
+; RUN: %lld -lSystem -dylib %t/foo.thinlto.o %t/refs-foo.thinlto.o -o %t/test-thinlto \
+; RUN:  -save-temps -exported_symbol _refs_foo -exported_symbol _same_module_caller
+
+; RUN: llvm-dis %t/foo.thinlto.o.2.internalize.bc -o - | FileCheck %s --check-prefix=THINLTO-FOO
+; RUN: llvm-dis %t/refs-foo.thinlto.o.2.internalize.bc -o - | FileCheck %s --check-prefix=THINLTO-REFS-FOO
+; RUN: llvm-objdump --macho --syms %t/test-thinlto | FileCheck %s --check-prefix=THINLTO-SYMS
+
+; THINLTO-FOO: define dso_local void @foo()
+; THINLTO-FOO: define internal void @same_module_callee()
+; THINLTO-REFS-FOO: declare dso_local void @foo()
+; THINLTO-REFS-FOO: define dso_local void @refs_foo()
+
+; THINLTO-SYMS: l     F __TEXT,__text _foo
+; THINLTO-SYMS: g     F __TEXT,__text _same_module_caller
+; THINLTO-SYMS: g     F __TEXT,__text _refs_foo
+
+;--- foo.ll
+
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() {
+  ret void
+}
+
+define void @same_module_callee() {
+  ret void
+}
+
+define void @same_module_caller() {
+  call void @same_module_callee()
+  ret void
+}
+
+;--- refs-foo.ll
+
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @foo()
+
+define void @refs_foo() {
+  call void @foo()
+  ret void
+}


        


More information about the llvm-commits mailing list