[lld] eb5b7d4 - [lld-macho] LTO: Unset VisibleToRegularObj where possible

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 15 18:16:45 PDT 2021


Author: Jez Ng
Date: 2021-04-15T21:16:33-04:00
New Revision: eb5b7d4497e323cf6214eb3e008dc37bc5ed1fd7

URL: https://github.com/llvm/llvm-project/commit/eb5b7d4497e323cf6214eb3e008dc37bc5ed1fd7
DIFF: https://github.com/llvm/llvm-project/commit/eb5b7d4497e323cf6214eb3e008dc37bc5ed1fd7.diff

LOG: [lld-macho] LTO: Unset VisibleToRegularObj where possible

This allows LLVM's LTO to internalize symbols that are not referenced
directly by regular objects. Naturally, this means we need to track
which symbols are referenced by regular objects. The approach taken here
is similar to LLD-COFF's: like the COFF port, we extend
`SymbolTable::insert()` to set the isVisibleToRegularObj bit. (LLD-ELF
relies on the Symbol constructor and `Symbol::mergeProperties()`, but
the Mach-O port does not have a `mergeProperties()` equivalent.)

>From what I can tell, ld64 (which uses libLTO) doesn't do this
optimization at all. I'm not even sure libLTO provides a way to do this.
Not having ld64's behavior as a reference implementation is unfortunate;
instead, I am relying on LLD-ELF/COFF's behavior as references while
erring on the conservative side. In particular, LLD-MachO will only do
this optimization for executables right now.

We also don't attempt it when `-flat_namespace` is used -- otherwise
we'd need scan the symbol table to find matches for every un-namespaced
symbol reference, which is expensive.

internalize.ll is based off the LLD-ELF tests `internalize-basic.ll` and
`internalize-undef.ll`. Looks like @davide added some of LLD-ELF's internalize
tests, so adding him as a reviewer...

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D99105

Added: 
    lld/test/MachO/internalize.ll

Modified: 
    lld/MachO/LTO.cpp
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.h
    lld/test/MachO/lto-save-temps.ll

Removed: 
    


################################################################################
diff  --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index 70c555f102bb9..f04b950553f81 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -25,6 +25,7 @@
 using namespace lld;
 using namespace lld::macho;
 using namespace llvm;
+using namespace llvm::MachO;
 using namespace llvm::sys;
 
 static lto::Config createConfig() {
@@ -39,6 +40,9 @@ static lto::Config createConfig() {
   };
   c.TimeTraceEnabled = config->timeTraceEnabled;
   c.TimeTraceGranularity = config->timeTraceGranularity;
+  if (config->saveTemps)
+    checkError(c.addSaveTemps(config->outputFile.str() + ".",
+                              /*UseInputModulePath=*/true));
   return c;
 }
 
@@ -67,6 +71,12 @@ void BitcodeCompiler::add(BitcodeFile &f) {
     // be removed.
     r.Prevailing = !objSym.isUndefined() && sym->getFile() == &f;
 
+    // FIXME: What about other output types? And we can probably be less
+    // restrictive with -flat_namespace, but it's an infrequent use case.
+    r.VisibleToRegularObj = config->outputType != MH_EXECUTE ||
+                            config->namespaceKind == NamespaceKind::flat ||
+                            sym->isUsedInRegularObj;
+
     // Un-define the symbol so that we don't get duplicate symbol errors when we
     // load the ObjFile emitted by LTO compilation.
     if (r.Prevailing)
@@ -74,7 +84,6 @@ void BitcodeCompiler::add(BitcodeFile &f) {
                                RefState::Strong);
 
     // TODO: set the other resolution configs properly
-    r.VisibleToRegularObj = true;
   }
   checkError(ltoObj->add(std::move(f.obj), resols));
 }

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 76d4289e627bb..1fd7704d87a8f 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -24,17 +24,22 @@ Symbol *SymbolTable::find(CachedHashStringRef cachedName) {
   return symVector[it->second];
 }
 
-std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
+std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
+                                              const InputFile *file) {
   auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
 
-  // Name already present in the symbol table.
-  if (!p.second)
-    return {symVector[p.first->second], false};
+  Symbol *sym;
+  if (!p.second) {
+    // Name already present in the symbol table.
+    sym = symVector[p.first->second];
+  } else {
+    // Name is a new symbol.
+    sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
+    symVector.push_back(sym);
+  }
 
-  // Name is a new symbol.
-  Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
-  symVector.push_back(sym);
-  return {sym, true};
+  sym->isUsedInRegularObj |= !file || isa<ObjFile>(file);
+  return {sym, p.second};
 }
 
 Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
@@ -44,7 +49,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
   Symbol *s;
   bool wasInserted;
   bool overridesWeakDef = false;
-  std::tie(s, wasInserted) = insert(name);
+  std::tie(s, wasInserted) = insert(name, file);
 
   if (!wasInserted) {
     if (auto *defined = dyn_cast<Defined>(s)) {
@@ -77,7 +82,7 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
                                   bool isWeakRef) {
   Symbol *s;
   bool wasInserted;
-  std::tie(s, wasInserted) = insert(name);
+  std::tie(s, wasInserted) = insert(name, file);
 
   RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
 
@@ -96,7 +101,7 @@ Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
                                uint32_t align, bool isPrivateExtern) {
   Symbol *s;
   bool wasInserted;
-  std::tie(s, wasInserted) = insert(name);
+  std::tie(s, wasInserted) = insert(name, file);
 
   if (!wasInserted) {
     if (auto *common = dyn_cast<CommonSymbol>(s)) {
@@ -117,7 +122,7 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
                               bool isTlv) {
   Symbol *s;
   bool wasInserted;
-  std::tie(s, wasInserted) = insert(name);
+  std::tie(s, wasInserted) = insert(name, file);
 
   RefState refState = RefState::Unreferenced;
   if (!wasInserted) {
@@ -149,7 +154,7 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
                              const object::Archive::Symbol &sym) {
   Symbol *s;
   bool wasInserted;
-  std::tie(s, wasInserted) = insert(name);
+  std::tie(s, wasInserted) = insert(name, file);
 
   if (wasInserted)
     replaceSymbol<LazySymbol>(s, file, sym);

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 29aa03ec884b2..10d17f6a52b2e 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -60,7 +60,7 @@ class SymbolTable {
   Symbol *find(StringRef name) { return find(llvm::CachedHashStringRef(name)); }
 
 private:
-  std::pair<Symbol *, bool> insert(StringRef name);
+  std::pair<Symbol *, bool> insert(StringRef name, const InputFile *);
   llvm::DenseMap<llvm::CachedHashStringRef, int> symMap;
   std::vector<Symbol *> symVector;
 };

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 054834206d6f1..962aad317c6dc 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -85,12 +85,17 @@ class Symbol {
 
 protected:
   Symbol(Kind k, StringRefZ name, InputFile *file)
-      : symbolKind(k), nameData(name.data), nameSize(name.size), file(file) {}
+      : symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
+        isUsedInRegularObj(!file || isa<ObjFile>(file)) {}
 
   Kind symbolKind;
   const char *nameData;
   mutable uint32_t nameSize;
   InputFile *file;
+
+public:
+  // True if this symbol was referenced by a regular (non-bitcode) object.
+  bool isUsedInRegularObj;
 };
 
 class Defined : public Symbol {
@@ -249,7 +254,10 @@ T *replaceSymbol(Symbol *s, ArgT &&...arg) {
   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
          "Not a Symbol");
 
-  return new (s) T(std::forward<ArgT>(arg)...);
+  bool isUsedInRegularObj = s->isUsedInRegularObj;
+  T *sym = new (s) T(std::forward<ArgT>(arg)...);
+  sym->isUsedInRegularObj |= isUsedInRegularObj;
+  return sym;
 }
 
 } // namespace macho

diff  --git a/lld/test/MachO/internalize.ll b/lld/test/MachO/internalize.ll
new file mode 100644
index 0000000000000..74c39be3efad9
--- /dev/null
+++ b/lld/test/MachO/internalize.ll
@@ -0,0 +1,72 @@
+; REQUIRES: x86
+
+;; Check that we internalize bitcode symbols (only) where possible, i.e. when
+;; they are not referenced by undefined symbols originating from non-bitcode
+;; files.
+
+; RUN: rm -rf %t; split-file %s %t
+; RUN: llvm-as %t/test.s -o %t/test.o
+; RUN: llvm-as %t/baz.s -o %t/baz.o
+; RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/regular.s -o %t/regular.o
+; RUN: %lld -pie -lSystem %t/test.o %t/baz.o %t/regular.o -o %t/test -save-temps
+; RUN: llvm-dis < %t/test.0.2.internalize.bc | FileCheck %s
+; RUN: llvm-objdump --macho --syms %t/test | FileCheck %s --check-prefix=SYMTAB
+
+;; Check that main is not internalized. This covers the case of bitcode symbols
+;; referenced by undefined symbols that don't belong to any InputFile.
+; CHECK: define void @main()
+
+;; Check that the foo and bar functions are correctly internalized.
+; CHECK: define internal void @bar()
+; CHECK: define internal void @foo()
+
+;; Check that a bitcode symbol that is referenced by a regular object file isn't
+;; internalized.
+; CHECK: define void @used_in_regular_obj()
+
+;; Check that a bitcode symbol that is defined in another bitcode file gets
+;; internalized.
+; CHECK: define internal void @baz()
+
+; Check foo and bar are not emitted to the .symtab
+; SYMTAB-LABEL: SYMBOL TABLE:
+; SYMTAB-NEXT:  g     F __TEXT,__text _main
+; SYMTAB-NEXT:  g     F __TEXT,__text _used_in_regular_obj
+; SYMTAB-NEXT:  g     F __TEXT,__text __mh_execute_header
+; SYMTAB-EMPTY:
+
+;--- test.s
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @baz()
+
+define void @main() {
+  call void @bar()
+  call void @baz()
+  ret void
+}
+
+define void @bar() {
+  ret void
+}
+
+define hidden void @foo() {
+  ret void
+}
+
+define void @used_in_regular_obj() {
+  ret void
+}
+
+;--- baz.s
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @baz() {
+  ret void
+}
+
+;--- regular.s
+.data
+.quad _used_in_regular_obj

diff  --git a/lld/test/MachO/lto-save-temps.ll b/lld/test/MachO/lto-save-temps.ll
index 63c196b4a73f7..d1b2cad33bc28 100644
--- a/lld/test/MachO/lto-save-temps.ll
+++ b/lld/test/MachO/lto-save-temps.ll
@@ -5,15 +5,15 @@
 
 ; RUN: rm -rf %t; split-file %s %t
 ; RUN: llvm-as %t/foo.ll -o %t/foo.o
-; RUN: llvm-as %t/test.ll -o %t/test.o
-; RUN: %lld -save-temps %t/foo.o %t/test.o -o %t/test
+; RUN: llvm-as %t/bar.ll -o %t/bar.o
+; RUN: %lld -dylib -save-temps %t/foo.o %t/bar.o -o %t/test
 ; RUN: llvm-objdump -d --no-show-raw-insn %t/test.lto.o | FileCheck %s --check-prefix=ALL
 ; RUN: llvm-objdump -d --no-show-raw-insn %t/test | FileCheck %s --check-prefix=ALL
 
 ; RUN: rm -rf %t; split-file %s %t
 ; RUN: opt -module-summary %t/foo.ll -o %t/foo.o
-; RUN: opt -module-summary %t/test.ll -o %t/test.o
-; RUN: %lld -save-temps %t/foo.o %t/test.o -o %t/test
+; RUN: opt -module-summary %t/bar.ll -o %t/bar.o
+; RUN: %lld -dylib -save-temps %t/foo.o %t/bar.o -o %t/test
 ; RUN: llvm-objdump -d --no-show-raw-insn %t/test1.lto.o | FileCheck %s --check-prefix=FOO
 ; RUN: llvm-objdump -d --no-show-raw-insn %t/test2.lto.o | FileCheck %s --check-prefix=MAIN
 ; RUN: llvm-objdump -d --no-show-raw-insn %t/test | FileCheck %s --check-prefix=ALL
@@ -21,12 +21,12 @@
 ; FOO:      <_foo>:
 ; FOO-NEXT: retq
 
-; MAIN:      <_main>:
+; MAIN:      <_bar>:
 ; MAIN-NEXT: retq
 
 ; ALL:      <_foo>:
 ; ALL-NEXT: retq
-; ALL:      <_main>:
+; ALL:      <_bar>:
 ; ALL-NEXT: retq
 
 ;--- foo.ll
@@ -38,11 +38,11 @@ define void @foo() {
   ret void
 }
 
-;--- test.ll
+;--- bar.ll
 
 target triple = "x86_64-apple-macosx10.15.0"
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @main() {
+define void @bar() {
   ret void
 }


        


More information about the llvm-commits mailing list