[lld] 58edaef - [lld-macho] Do not error out on dead stripped duplicate symbols

Vincent Lee via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 30 15:10:12 PDT 2022


Author: Vincent Lee
Date: 2022-09-30T15:09:27-07:00
New Revision: 58edaef3fe08b81d9e3c172dd38338508be7ac79

URL: https://github.com/llvm/llvm-project/commit/58edaef3fe08b81d9e3c172dd38338508be7ac79
DIFF: https://github.com/llvm/llvm-project/commit/58edaef3fe08b81d9e3c172dd38338508be7ac79.diff

LOG: [lld-macho] Do not error out on dead stripped duplicate symbols

Builds that error out on duplicate symbols can still succeed if the symbols
will be dead stripped. Currently, this is the current behavior in ld64.
https://github.com/apple-oss-distributions/ld64/blob/main/src/ld/Resolver.cpp#L2018.
In order to provide an easier to path for adoption, introduce a new flag that will
retain compatibility with ld64's behavior (similar to `--deduplicate-literals`). This is
turned off by default since we do not encourage this behavior in the linker.

Reviewed By: #lld-macho, thakis, int3

Differential Revision: https://reviews.llvm.org/D134794

Added: 
    lld/test/MachO/abs-duplicate.s

Modified: 
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/Options.td
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Writer.cpp
    lld/docs/MachO/ld64-vs-lld.rst
    lld/test/MachO/dead-strip.s

Removed: 
    lld/test/MachO/invalid/abs-duplicate.s


################################################################################
diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 620206f307d6f..fa79b87268cd5 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -135,6 +135,7 @@ struct Configuration {
   bool timeTraceEnabled = false;
   bool dataConst = false;
   bool dedupLiterals = true;
+  bool deadStripDuplicates = false;
   bool omitDebugInfo = false;
   bool warnDylibInstallName = false;
   bool ignoreOptimizationHints = false;

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index f13649a911d92..9e30239fa9459 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1480,6 +1480,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   config->dedupLiterals =
       args.hasFlag(OPT_deduplicate_literals, OPT_icf_eq, false) ||
       config->icfLevel != ICFLevel::none;
+  config->deadStripDuplicates = args.hasArg(OPT_dead_strip_duplicates);
   config->warnDylibInstallName = args.hasFlag(
       OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
   config->ignoreOptimizationHints = args.hasArg(OPT_ignore_optimization_hints);

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index f7e0414180a32..8a08d036730af 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -57,6 +57,9 @@ def time_trace_granularity_eq: Joined<["--"], "time-trace-granularity=">,
 def deduplicate_literals: Flag<["--"], "deduplicate-literals">,
     HelpText<"Enable literal deduplication. This is implied by --icf={safe,all}">,
     Group<grp_lld>;
+def dead_strip_duplicates: Flag<["--"], "dead-strip-duplicates">,
+    HelpText<"Do not error on duplicate symbols that will be dead stripped.">,
+    Group<grp_lld>;
 def print_dylib_search: Flag<["--"], "print-dylib-search">,
     HelpText<"Print which paths lld searched when trying to find dylibs">,
     Group<grp_lld>;

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 6b0a8f4442b3e..301692ff583fa 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -45,6 +45,21 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
   return {sym, p.second};
 }
 
+namespace {
+struct DuplicateSymbolDiag {
+  // Pair containing source location and source file
+  const std::pair<std::string, std::string> src1;
+  const std::pair<std::string, std::string> src2;
+  const Symbol *sym;
+
+  DuplicateSymbolDiag(const std::pair<std::string, std::string> src1,
+                      const std::pair<std::string, std::string> src2,
+                      const Symbol *sym)
+      : src1(src1), src2(src2), sym(sym) {}
+};
+SmallVector<DuplicateSymbolDiag> dupSymDiags;
+} // namespace
+
 Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
                                  InputSection *isec, uint64_t value,
                                  uint64_t size, bool isWeakDef,
@@ -80,17 +95,13 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
           concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined));
         }
       } else {
-        std::string src1 = defined->getSourceLocation();
-        std::string src2 = isec ? isec->getSourceLocation(value) : "";
-
-        std::string message =
-            "duplicate symbol: " + toString(*defined) + "\n>>> defined in ";
-        if (!src1.empty())
-          message += src1 + "\n>>>            ";
-        message += toString(defined->getFile()) + "\n>>> defined in ";
-        if (!src2.empty())
-          message += src2 + "\n>>>            ";
-        error(message + toString(file));
+        std::string srcLoc1 = defined->getSourceLocation();
+        std::string srcLoc2 = isec ? isec->getSourceLocation(value) : "";
+        std::string srcFile1 = toString(defined->getFile());
+        std::string srcFile2 = toString(file);
+
+        dupSymDiags.push_back({make_pair(srcLoc1, srcFile1),
+                               make_pair(srcLoc2, srcFile2), defined});
       }
 
     } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
@@ -366,6 +377,21 @@ struct UndefinedDiag {
 MapVector<const Undefined *, UndefinedDiag> undefs;
 }
 
+void macho::reportPendingDuplicateSymbols() {
+  for (const auto &duplicate : dupSymDiags) {
+    if (!config->deadStripDuplicates || duplicate.sym->isLive()) {
+      std::string message =
+          "duplicate symbol: " + toString(*duplicate.sym) + "\n>>> defined in ";
+      if (!duplicate.src1.first.empty())
+        message += duplicate.src1.first + "\n>>>            ";
+      message += duplicate.src1.second + "\n>>> defined in ";
+      if (!duplicate.src2.first.empty())
+        message += duplicate.src2.first + "\n>>>            ";
+      error(message + duplicate.src2.second);
+    }
+  }
+}
+
 void macho::reportPendingUndefinedSymbols() {
   for (const auto &undef : undefs) {
     const UndefinedDiag &locations = undef.second;

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index d90245c7aaa0e..ea02367edc269 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -72,6 +72,7 @@ class SymbolTable {
 };
 
 void reportPendingUndefinedSymbols();
+void reportPendingDuplicateSymbols();
 
 // Call reportPendingUndefinedSymbols() to emit diagnostics.
 void treatUndefinedSymbol(const Undefined &, StringRef source);

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index b9e6775843b51..cffc89b2ea3d7 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1185,8 +1185,9 @@ template <class LP> void Writer::run() {
   if (in.initOffsets->isNeeded())
     in.initOffsets->setUp();
 
-  // Do not proceed if there was an undefined symbol.
+  // Do not proceed if there were undefined or duplicate symbols.
   reportPendingUndefinedSymbols();
+  reportPendingDuplicateSymbols();
   if (errorCount())
     return;
 

diff  --git a/lld/docs/MachO/ld64-vs-lld.rst b/lld/docs/MachO/ld64-vs-lld.rst
index f571e6a62b36f..6ee48715b2ecb 100644
--- a/lld/docs/MachO/ld64-vs-lld.rst
+++ b/lld/docs/MachO/ld64-vs-lld.rst
@@ -14,6 +14,15 @@ some programs which have (incorrectly) relied on string deduplication always
 occurring. In particular, programs which compare string literals via pointer
 equality must be fixed to use value equality instead.
 
+Dead Stripping Duplicate Symbols
+********************************
+ld64 strips dead code before reporting duplicate symbols. By default, LLD does
+the opposite. ld64's behavior hides ODR violations, so we have chosen not
+to follow it. But, to make adoption easy, LLD can mimic this behavior via
+the ``--dead-strip-duplicates`` flag. Usage of this flag is discouraged, and
+this behavior should be fixed in the source. However, for sources that are not
+within the user's control, this will mitigate users for adoption.
+
 ``-no_deduplicate`` Flag
 ************************
 - ld64: This turns off ICF (deduplication pass) in the linker.

diff  --git a/lld/test/MachO/abs-duplicate.s b/lld/test/MachO/abs-duplicate.s
new file mode 100644
index 0000000000000..a02324c2e5063
--- /dev/null
+++ b/lld/test/MachO/abs-duplicate.s
@@ -0,0 +1,41 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
+# RUN: not %lld -lSystem %t/test.o %t/weakfoo.o -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK:      error: duplicate symbol: _weakfoo
+# CHECK-NEXT: >>> defined in {{.*}}/test.o
+# CHECK-NEXT: >>> defined in {{.*}}/weakfoo.o
+
+## Duplicate absolute symbols that will be dead stripped later should not fail.
+# RUN: %lld -lSystem -dead_strip --dead-strip-duplicates -map %t/stripped-duplicate-map \
+# RUN:     %t/test.o %t/weakfoo.o -o %t/test
+# RUN: llvm-objdump --syms %t/test | FileCheck %s --check-prefix=DUP
+# DUP-LABEL: SYMBOL TABLE:
+# DUP-NEXT:   g F __TEXT,__text _main
+# DUP-NEXT:   g F __TEXT,__text __mh_execute_header
+# DUP-NEXT:   *UND* dyld_stub_binder
+
+## Dead stripped non-section symbols don't show up in map files because there's no input section.
+## Check that _weakfoo doesn't show up. This matches ld64.
+# RUN: FileCheck --check-prefix=DUPMAP %s < %t/stripped-duplicate-map
+# DUPMAP: _main
+# DUPMAP-LABEL: Dead Stripped Symbols
+# DUPMAP-NOT: _weakfoo
+
+#--- weakfoo.s
+.globl _weakfoo
+## The weak attribute is ignored for absolute symbols, so we will have a
+## duplicate symbol error for _weakfoo.
+.weak_definition _weakfoo
+_weakfoo = 0x1234
+
+#--- test.s
+.globl _main, _weakfoo
+.weak_definition _weakfoo
+_weakfoo = 0x5678
+
+.text
+_main:
+  ret

diff  --git a/lld/test/MachO/dead-strip.s b/lld/test/MachO/dead-strip.s
index 00cc5ee7bba51..20cfd95f966eb 100644
--- a/lld/test/MachO/dead-strip.s
+++ b/lld/test/MachO/dead-strip.s
@@ -321,7 +321,6 @@
 # RUN: %lld -dylib -dead_strip --deduplicate-literals %t/literals.o -o %t/literals
 # RUN: llvm-objdump --macho --section="__TEXT,__cstring" --section="__DATA,str_ptrs" \
 # RUN:   --section="__TEXT,__literals" %t/literals | FileCheck %s --check-prefix=LIT
-
 # LIT:      Contents of (__TEXT,__cstring) section
 # LIT-NEXT: foobar
 # LIT-NEXT: Contents of (__DATA,str_ptrs) section
@@ -330,6 +329,45 @@
 # LIT-NEXT: Contents of (__TEXT,__literals) section
 # LIT-NEXT: ef be ad de {{$}}
 
+## Duplicate symbols that will be dead stripped later should not fail when using
+## the --dead-stripped-duplicates flag
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN:     %t/duplicate1.s -o %t/duplicate1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN:     %t/duplicate2.s -o %t/duplicate2.o
+# RUN: %lld -lSystem -dead_strip --dead-strip-duplicates -map %t/stripped-duplicate-map \
+# RUN:     %t/duplicate1.o %t/duplicate2.o -o %t/duplicate
+# RUN: llvm-objdump --syms %t/duplicate | FileCheck %s --check-prefix=DUP
+# DUP-LABEL: SYMBOL TABLE:
+# DUP-NEXT:   g F __TEXT,__text _main
+# DUP-NEXT:   g F __TEXT,__text __mh_execute_header
+# DUP-NEXT:   *UND* dyld_stub_binder
+
+## Check that the duplicate dead stripped symbols get listed properly.
+# RUN: FileCheck --check-prefix=DUPMAP %s < %t/stripped-duplicate-map
+# DUPMAP: _main
+# DUPMAP-LABEL: Dead Stripped Symbols
+# DUPMAP: <<dead>> [ 2] _foo
+
+#--- duplicate1.s
+.text
+.globl _main, _foo
+_foo:
+  retq
+
+_main:
+  retq
+
+.subsections_via_symbols
+
+#--- duplicate2.s
+.text
+.globl _foo
+_foo:
+  retq
+
+.subsections_via_symbols
+
 #--- basics.s
 .comm _ref_com, 1
 .comm _unref_com, 1

diff  --git a/lld/test/MachO/invalid/abs-duplicate.s b/lld/test/MachO/invalid/abs-duplicate.s
deleted file mode 100644
index ff1d75e5b35b1..0000000000000
--- a/lld/test/MachO/invalid/abs-duplicate.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# REQUIRES: x86
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
-# RUN: not %lld -lSystem %t/test.o %t/weakfoo.o -o %t/test 2>&1 | FileCheck %s
-
-# CHECK:      error: duplicate symbol: _weakfoo
-# CHECK-NEXT: >>> defined in {{.*}}/test.o
-# CHECK-NEXT: >>> defined in {{.*}}/weakfoo.o
-
-#--- weakfoo.s
-.globl _weakfoo
-## The weak attribute is ignored for absolute symbols, so we will have a
-## duplicate symbol error for _weakfoo.
-.weak_definition _weakfoo
-_weakfoo = 0x1234
-
-#--- test.s
-.globl _main, _weakfoo
-.weak_definition _weakfoo
-_weakfoo = 0x5678
-
-.text
-_main:
-  ret


        


More information about the llvm-commits mailing list