[lld] ba890da - [ELF] Demote lazy symbols relative to a discarded section to Undefined

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 9 11:27:44 PDT 2020


Author: Fangrui Song
Date: 2020-06-09T11:27:34-07:00
New Revision: ba890da2878299dc82b104c06f067e45162d880f

URL: https://github.com/llvm/llvm-project/commit/ba890da2878299dc82b104c06f067e45162d880f
DIFF: https://github.com/llvm/llvm-project/commit/ba890da2878299dc82b104c06f067e45162d880f.diff

LOG: [ELF] Demote lazy symbols relative to a discarded section to Undefined

Fixes PR45594.

In `ObjFile<ELFT>::initializeSymbols()`, for a defined symbol relative to
a discarded section (due to section group rules), it may have been
inserted as a lazy symbol. We need to demote it to an Undefined to
enable the `discarded section` error happened in a later pass.

Add `LazyObjFile::fetched` (if true) and `ArchiveFile::parsed` (if
false) to represent that there is an ongoing lazy symbol fetch and we
should replace the current lazy symbol with an Undefined, instead of
calling `Symbol::resolve` (`Symbol::resolve` should be called if the lazy
symbol was added by an unrelated archive/lazy object).

As a side result, one small issue in start-lib-comdat.s is now fixed.
The hack motivating D51892 will be unsupported: if
`.gnu.linkonce.t.__i686.get_pc_thunk.bx` in an archive is referenced
by another section, this will likely be errored unless the function is
also defined in a regular object file.
(Bringing back rL330869 would error `undefined symbol` instead of the
more relevant `discarded section`.)

Note, glibc i386's crti.o still works (PR31215), because
`.gnu.linkonce.t.__x86.get_pc_thunk.bx` is in crti.o (one of the first
regular object files in a linker command line).

Reviewed By: psmith

Differential Revision: https://reviews.llvm.org/D79300

Added: 
    lld/test/ELF/comdat-discarded-lazy.s

Modified: 
    lld/ELF/InputFiles.cpp
    lld/ELF/InputFiles.h
    lld/test/ELF/i386-linkonce.s
    lld/test/ELF/start-lib-comdat.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index c451aee1f921..5bbd6f0df7e9 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1117,8 +1117,20 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
     // COMDAT member sections, and if a comdat group is discarded, some
     // defined symbol in a .eh_frame becomes dangling symbols.
     if (sec == &InputSection::discarded) {
-      this->symbols[i]->resolve(
-          Undefined{this, name, binding, stOther, type, secIdx});
+      Undefined und{this, name, binding, stOther, type, secIdx};
+      Symbol *sym = this->symbols[i];
+      // !ArchiveFile::parsed or LazyObjFile::fetched means that the file
+      // containing this object has not finished processing, i.e. this symbol is
+      // a result of a lazy symbol fetch. We should demote the lazy symbol to an
+      // Undefined so that any relocations outside of the group to it will
+      // trigger a discarded section error.
+      if ((sym->symbolKind == Symbol::LazyArchiveKind &&
+           !cast<ArchiveFile>(sym->file)->parsed) ||
+          (sym->symbolKind == Symbol::LazyObjectKind &&
+           cast<LazyObjFile>(sym->file)->fetched))
+        sym->replace(und);
+      else
+        sym->resolve(und);
       continue;
     }
 
@@ -1141,6 +1153,10 @@ ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&file)
 void ArchiveFile::parse() {
   for (const Archive::Symbol &sym : file->symbols())
     symtab->addSymbol(LazyArchive{*this, sym});
+
+  // Inform a future invocation of ObjFile<ELFT>::initializeSymbols() that this
+  // archive has been processed.
+  parsed = true;
 }
 
 // Returns a buffer pointing to a member file containing a given symbol.
@@ -1615,14 +1631,13 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,
 }
 
 void LazyObjFile::fetch() {
-  if (mb.getBuffer().empty())
+  if (fetched)
     return;
+  fetched = true;
 
   InputFile *file = createObjectFile(mb, archiveName, offsetInArchive);
   file->groupId = groupId;
 
-  mb = {};
-
   // Copy symbol vector so that the new InputFile doesn't have to
   // insert the same defined symbols to the symbol table again.
   file->symbols = std::move(symbols);

diff  --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 51882e0c9647..7af85e417ca5 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -307,6 +307,8 @@ class LazyObjFile : public InputFile {
   template <class ELFT> void parse();
   void fetch();
 
+  bool fetched = false;
+
 private:
   uint64_t offsetInArchive;
 };
@@ -327,6 +329,8 @@ class ArchiveFile : public InputFile {
   size_t getMemberCount() const;
   size_t getFetchedMemberCount() const { return seen.size(); }
 
+  bool parsed = false;
+
 private:
   std::unique_ptr<Archive> file;
   llvm::DenseSet<uint64_t> seen;

diff  --git a/lld/test/ELF/comdat-discarded-lazy.s b/lld/test/ELF/comdat-discarded-lazy.s
new file mode 100644
index 000000000000..8ee15158f6b3
--- /dev/null
+++ b/lld/test/ELF/comdat-discarded-lazy.s
@@ -0,0 +1,60 @@
+# REQUIRES: x86
+## Test that lazy symbols in a section group can be demoted to Undefined,
+## so that we can report a "discarded section" error.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo '.globl f1, foo; f1: call foo; \
+# RUN:   .section .text.foo,"axG", at progbits,foo,comdat; foo:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
+
+## Test the case when the symbol causing a "discarded section" is ordered
+## *before* the symbol fetching the lazy object.
+## The test relies on the symbol table order of llvm-mc (lexical), which will
+## need adjustment if llvm-mc changes its behavior.
+# RUN: echo '.globl f2, aa; f2: call aa; \
+# RUN:   .section .text.foo,"axG", at progbits,foo,comdat; aa:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %taa.o
+# RUN: llvm-nm -p %taa.o | FileCheck --check-prefix=AA-NM %s
+# RUN: not ld.lld %t.o --start-lib %t1.o %taa.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s
+# RUN: rm -f %taa.a && llvm-ar rc %taa.a %taa.o
+# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %taa.a -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s
+
+# AA-NM: aa
+# AA-NM: f2
+
+# AA:      error: relocation refers to a symbol in a discarded section: aa
+# AA-NEXT: >>> defined in {{.*}}aa.o
+# AA-NEXT: >>> section group signature: foo
+# AA-NEXT: >>> prevailing definition is in {{.*}}1.o
+# AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1)
+
+## Test the case when the symbol causing a "discarded section" is ordered
+## *after* the symbol fetching the lazy object.
+# RUN: echo '.globl f2, zz; f2: call zz; \
+# RUN:   .section .text.foo,"axG", at progbits,foo,comdat; zz:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %tzz.o
+# RUN: llvm-nm -p %tzz.o | FileCheck --check-prefix=ZZ-NM %s
+# RUN: not ld.lld %t.o --start-lib %t1.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s
+# RUN: rm -f %tzz.a && llvm-ar rc %tzz.a %tzz.o
+# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %tzz.a -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s
+
+# ZZ-NM: f2
+# ZZ-NM: zz
+
+# ZZ:      error: relocation refers to a symbol in a discarded section: zz
+# ZZ-NEXT: >>> defined in {{.*}}zz.o
+# ZZ-NEXT: >>> section group signature: foo
+# ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o
+# ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1)
+
+## Don't error if the symbol which would cause "discarded section"
+## was inserted before %tzz.o
+# RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o
+# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null
+# RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o
+# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null
+
+.globl _start
+_start:
+  call f1
+  call f2

diff  --git a/lld/test/ELF/i386-linkonce.s b/lld/test/ELF/i386-linkonce.s
index c06b042c7638..f7da0aed4af5 100644
--- a/lld/test/ELF/i386-linkonce.s
+++ b/lld/test/ELF/i386-linkonce.s
@@ -2,7 +2,9 @@
 // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %s -o %t.o
 // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %p/Inputs/i386-linkonce.s -o %t2.o
 // RUN: llvm-ar rcs %t2.a %t2.o
-// RUN: ld.lld %t.o %t2.a -o %t
+// RUN: not ld.lld %t.o %t2.a -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: error: relocation refers to a symbol in a discarded section: __i686.get_pc_thunk.bx
 
     .globl _start
 _start:

diff  --git a/lld/test/ELF/start-lib-comdat.s b/lld/test/ELF/start-lib-comdat.s
index 34c9934803f0..996ddb485bab 100644
--- a/lld/test/ELF/start-lib-comdat.s
+++ b/lld/test/ELF/start-lib-comdat.s
@@ -6,7 +6,7 @@
 // RUN: ld.lld -shared -o %t3 %t1.o --start-lib %t2.o --end-lib
 // RUN: llvm-readobj --symbols %t3 | FileCheck %s
 // RUN: ld.lld -shared -o %t3 --start-lib %t2.o --end-lib %t1.o
-// RUN: llvm-readobj --symbols %t3 | FileCheck %s
+// RUN: llvm-readobj --symbols %t3 | FileCheck /dev/null --implicit-check-not='Name: zed'
 
 // CHECK:      Name: zed
 // CHECK-NEXT: Value:


        


More information about the llvm-commits mailing list