[llvm-branch-commits] [lld] 8f91f38 - [LLD] Search archives for symbol defs to override COMMON symbols.

Sean Fertile via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 7 07:10:21 PST 2020


Author: Sean Fertile
Date: 2020-12-07T10:09:19-05:00
New Revision: 8f91f38148e84a1b3fd8b5a090e53ff5dd0258f5

URL: https://github.com/llvm/llvm-project/commit/8f91f38148e84a1b3fd8b5a090e53ff5dd0258f5
DIFF: https://github.com/llvm/llvm-project/commit/8f91f38148e84a1b3fd8b5a090e53ff5dd0258f5.diff

LOG: [LLD] Search archives for symbol defs to override COMMON symbols.

This patch changes the archive handling to enable the semantics needed
for legacy FORTRAN common blocks and block data. When we have a COMMON
definition of a symbol and are including an archive, LLD will now
search the members for global/weak defintions to override the COMMON
symbol. The previous LLD behavior (where a member would only be included
if it satisifed some other needed symbol definition) can be re-enabled with the
option '-no-fortran-common'.

Differential Revision: https://reviews.llvm.org/D86142

Added: 
    lld/test/ELF/common-archive-lookup.s

Modified: 
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/InputFiles.cpp
    lld/ELF/InputFiles.h
    lld/ELF/Options.td
    lld/ELF/Symbols.cpp
    lld/docs/ld.lld.1
    lld/test/ELF/warn-backrefs.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 547e290c8939..0ec4cb9a0432 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -163,6 +163,7 @@ struct Configuration {
   bool fixCortexA53Errata843419;
   bool fixCortexA8;
   bool formatBinary = false;
+  bool fortranCommon;
   bool gcSections;
   bool gdbIndex;
   bool gnuHash = false;

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 395b6200aa08..87ca200877df 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -974,6 +974,8 @@ static void readConfigs(opt::InputArgList &args) {
                                      !args.hasArg(OPT_relocatable);
   config->fixCortexA8 =
       args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable);
+  config->fortranCommon =
+      args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, true);
   config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
   config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
   config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);

diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index f44260959d27..3ff93c85617c 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1237,6 +1237,88 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) {
   parseFile(file);
 }
 
+// The handling of tentative definitions (COMMON symbols) in archives is murky.
+// A tentative defintion will be promoted to a global definition if there are no
+// non-tentative definitions to dominate it. When we hold a tentative definition
+// to a symbol and are inspecting archive memebers for inclusion there are 2
+// ways we can proceed:
+//
+// 1) Consider the tentative definition a 'real' definition (ie promotion from
+//    tentative to real definition has already happened) and not inspect
+//    archive members for Global/Weak definitions to replace the tentative
+//    definition. An archive member would only be included if it satisfies some
+//    other undefined symbol. This is the behavior Gold uses.
+//
+// 2) Consider the tentative definition as still undefined (ie the promotion to
+//    a real definiton happens only after all symbol resolution is done).
+//    The linker searches archive memebers for global or weak definitions to
+//    replace the tentative definition with. This is the behavior used by
+//    GNU ld.
+//
+//  The second behavior is inherited from SysVR4, which based it on the FORTRAN
+//  COMMON BLOCK model. This behavior is needed for proper initalizations in old
+//  (pre F90) FORTRAN code that is packaged into an archive.
+//
+//  The following functions search archive members for defintions to replace
+//  tentative defintions (implementing behavior 2).
+static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
+                                  StringRef archiveName) {
+  IRSymtabFile symtabFile = check(readIRSymtab(mb));
+  for (const irsymtab::Reader::SymbolRef &sym :
+       symtabFile.TheReader.symbols()) {
+    if (sym.isGlobal() && sym.getName() == symName)
+      return !sym.isUndefined() && !sym.isCommon();
+  }
+  return false;
+}
+
+template <class ELFT>
+static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
+                           StringRef archiveName) {
+  ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName);
+  StringRef stringtable = obj->getStringTable();
+
+  for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
+    Expected<StringRef> name = sym.getName(stringtable);
+    if (name && name.get() == symName)
+      return sym.isDefined() && !sym.isCommon();
+  }
+  return false;
+}
+
+static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
+                           StringRef archiveName) {
+  switch (getELFKind(mb, archiveName)) {
+  case ELF32LEKind:
+    return isNonCommonDef<ELF32LE>(mb, symName, archiveName);
+  case ELF32BEKind:
+    return isNonCommonDef<ELF32BE>(mb, symName, archiveName);
+  case ELF64LEKind:
+    return isNonCommonDef<ELF64LE>(mb, symName, archiveName);
+  case ELF64BEKind:
+    return isNonCommonDef<ELF64BE>(mb, symName, archiveName);
+  default:
+    llvm_unreachable("getELFKind");
+  }
+}
+
+bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) {
+  Archive::Child c =
+      CHECK(sym.getMember(), toString(this) +
+                                 ": could not get the member for symbol " +
+                                 toELFString(sym));
+  MemoryBufferRef mb =
+      CHECK(c.getMemoryBufferRef(),
+            toString(this) +
+                ": could not get the buffer for the member defining symbol " +
+                toELFString(sym));
+
+  if (isBitcode(mb))
+    return isBitcodeNonCommonDef(mb, sym.getName(), getName());
+
+  return isNonCommonDef(mb, sym.getName(), getName());
+}
+
 size_t ArchiveFile::getMemberCount() const {
   size_t count = 0;
   Error err = Error::success();
@@ -1771,6 +1853,13 @@ template <class ELFT> void LazyObjFile::parse() {
   }
 }
 
+bool LazyObjFile::shouldFetchForCommon(const StringRef &name) {
+  if (isBitcode(mb))
+    return isBitcodeNonCommonDef(mb, name, archiveName);
+
+  return isNonCommonDef(mb, name, archiveName);
+}
+
 std::string elf::replaceThinLTOSuffix(StringRef path) {
   StringRef suffix = config->thinLTOObjectSuffixReplace.first;
   StringRef repl = config->thinLTOObjectSuffixReplace.second;

diff  --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index e8fc68900db1..8c898280b85c 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -312,6 +312,10 @@ class LazyObjFile : public InputFile {
   template <class ELFT> void parse();
   void fetch();
 
+  // Check if a non-common symbol should be fetched to override a common
+  // definition.
+  bool shouldFetchForCommon(const StringRef &name);
+
   bool fetched = false;
 
 private:
@@ -331,6 +335,10 @@ class ArchiveFile : public InputFile {
   // more than once.)
   void fetch(const Archive::Symbol &sym);
 
+  // Check if a non-common symbol should be fetched to override a common
+  // definition.
+  bool shouldFetchForCommon(const Archive::Symbol &sym);
+
   size_t getMemberCount() const;
   size_t getFetchedMemberCount() const { return seen.size(); }
 

diff  --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c3490b6f9d7a..0254b54eca1d 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -64,6 +64,10 @@ defm optimize_bb_jumps: BB<"optimize-bb-jumps",
     "Remove direct jumps at the end to the next basic block",
     "Do not remove any direct jumps at the end to the next basic block (default)">;
 
+defm fortran_common : BB<"fortran-common",
+    "Search archive members for definitions to override COMMON symbols (default)",
+    "Do not search archive members for definitions to override COMMON symbols">;
+
 defm split_stack_adjust_size
     : Eq<"split-stack-adjust-size",
          "Specify adjustment to stack size when a split-stack function calls a "

diff  --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index a2153da2e8d8..a54f68edf952 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -689,7 +689,33 @@ void Symbol::resolveDefined(const Defined &other) {
                     other.value);
 }
 
+template <class LazyT>
+static void replaceCommon(Symbol &oldSym, const LazyT &newSym) {
+  backwardReferences.erase(&oldSym);
+  oldSym.replace(newSym);
+  newSym.fetch();
+}
+
 template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
+  // For common objects, we want to look for global or weak definitions that
+  // should be fetched as the cannonical definition instead.
+  if (isCommon() && elf::config->fortranCommon) {
+    if (auto *laSym = dyn_cast<LazyArchive>(&other)) {
+      ArchiveFile *archive = cast<ArchiveFile>(laSym->file);
+      const Archive::Symbol &archiveSym = laSym->sym;
+      if (archive->shouldFetchForCommon(archiveSym)) {
+        replaceCommon(*this, other);
+        return;
+      }
+    } else if (auto *loSym = dyn_cast<LazyObject>(&other)) {
+      LazyObjFile *obj = cast<LazyObjFile>(loSym->file);
+      if (obj->shouldFetchForCommon(loSym->getName())) {
+        replaceCommon(*this, other);
+        return;
+      }
+    }
+  }
+
   if (!isUndefined()) {
     // See the comment in resolveUndefined().
     if (isDefined())

diff  --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 538d09d6a5f0..1449b02c4906 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -311,6 +311,8 @@ Do not demangle symbol names.
 Inhibit output of an
 .Li .interp
 section.
+.It Fl -no-fortran-common
+Do not search archive members for definitions to override COMMON symbols.
 .It Fl -no-gc-sections
 Disable garbage collection of unused sections.
 .It Fl -no-gnu-unique

diff  --git a/lld/test/ELF/common-archive-lookup.s b/lld/test/ELF/common-archive-lookup.s
new file mode 100644
index 000000000000..c33ff86973ef
--- /dev/null
+++ b/lld/test/ELF/common-archive-lookup.s
@@ -0,0 +1,182 @@
+# REQUIRES: ppc
+
+# RUN: rm -rf %t.dir
+# RUN: split-file %s %t.dir
+# RUN: cd %t.dir
+
+## Object files.
+# RUN: llvm-mc -triple=powerpc64le -filetype=obj ref.s -o %t1.o
+# RUN: llvm-mc -triple=powerpc64le -filetype=obj refanddef.s -o %t2.o
+# RUN: llvm-mc -triple=powerpc64le -filetype=obj def.s -o %tstrong_data_only.o
+# RUN: llvm-mc -triple=powerpc64le -filetype=obj weak.s -o %tweak_data_only.o
+
+# RUN: llvm-mc -triple=powerpc64le -filetype=obj main.s -o %tmain.o
+
+## Object file archives.
+# RUN: llvm-ar crs %t1.a %t1.o %tstrong_data_only.o
+# RUN: llvm-ar crs %t2.a %t1.o %tweak_data_only.o
+# RUN: llvm-ar crs %t3.a %t2.o %tstrong_data_only.o
+
+## Bitcode files.
+# RUN: llvm-as -o %t1.bc commonblock.ll
+# RUN: llvm-as -o %t2.bc blockdata.ll
+
+## Bitcode archive.
+# RUN: llvm-ar crs %t4.a %t1.bc %t2.bc
+
+# RUN: ld.lld -o %t1 %tmain.o %t1.a
+# RUN: llvm-objdump -D -j .data %t1 | FileCheck --check-prefix=TEST1 %s
+
+# RUN: ld.lld -o %t2 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib
+# RUN: llvm-objdump -D -j .data %t2 | FileCheck --check-prefix=TEST1 %s
+
+# RUN: ld.lld -o %t3 %tmain.o %t2.a
+# RUN: llvm-objdump -D -j .data %t3 | FileCheck --check-prefix=TEST1 %s
+
+# RUN: ld.lld -o %t4 %tmain.o --start-lib %t1.o %tweak_data_only.o --end-lib
+# RUN: llvm-objdump -D -j .data %t4 | FileCheck --check-prefix=TEST1 %s
+
+# RUN: ld.lld -o %t5 %tmain.o %t3.a --print-map | FileCheck --check-prefix=MAP %s
+
+# RUN: ld.lld -o %t6 %tmain.o %t2.o %t1.a
+# RUN: llvm-objdump -D -j .data %t6 | FileCheck --check-prefix=TEST2 %s
+
+# RUN: ld.lld -o %t7 %tmain.o %t2.o --start-lib %t1.o %tstrong_data_only.o --end-lib
+# RUN: llvm-objdump -D -j .data %t7 | FileCheck --check-prefix=TEST2 %s
+
+# RUN: not ld.lld -o %t8 %tmain.o %t1.a %tstrong_data_only.o 2>&1 | \
+# RUN:   FileCheck --check-prefix=ERR %s
+
+# RUN: not ld.lld -o %t9 %tmain.o --start-lib %t1.o %t2.o --end-lib  %tstrong_data_only.o 2>&1 | \
+# RUN:   FileCheck --check-prefix=ERR %s
+
+# ERR: ld.lld: error: duplicate symbol: block
+
+# RUN: ld.lld --no-fortran-common -o %t10 %tmain.o %t1.a
+# RUN: llvm-readobj --syms %t10 | FileCheck --check-prefix=NFC %s
+
+# RUN: ld.lld --no-fortran-common -o %t11 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib
+# RUN: llvm-readobj --syms %t11 | FileCheck --check-prefix=NFC %s
+
+# RUN: ld.lld -o - %tmain.o %t4.a --lto-emit-asm | FileCheck --check-prefix=ASM %s
+
+# RUN: ld.lld -o - %tmain.o  --start-lib %t1.bc %t2.bc --end-lib --lto-emit-asm | \
+# RUN:   FileCheck --check-prefix=ASM %s
+
+## Old FORTRAN that mixes use of COMMON blocks and BLOCK DATA requires that we
+## search through archives for non-tentative definitions (from the BLOCK DATA)
+## to replace the tentative definitions (from the COMMON block(s)).
+
+## Ensure we have used the initialized definition of 'block' instead of a
+## common definition.
+# TEST1-LABEL:  Disassembly of section .data:
+# TEST1:          <block>:
+# TEST1-NEXT:       ea 2e 44 54
+# TEST1-NEXT:       fb 21 09 40
+# TEST1-NEXT:       ...
+
+
+# NFC:       Name: block
+# NFC-NEXT:  Value:
+# NFC-NEXT:  Size: 40
+# NFC-NEXT:  Binding: Global (0x1)
+# NFC-NEXT:  Type: Object (0x1)
+# NFC-NEXT:  Other: 0
+# NFC-NEXT:  Section: .bss
+
+## Expecting the strong definition from the object file, and the defintions from
+## the archive do not interfere.
+# TEST2-LABEL: Disassembly of section .data:
+# TEST2:         <block>:
+# TEST2-NEXT:     03 57 14 8b
+# TEST2-NEXT:     0a bf 05 40
+# TEST2-NEXT:     ...
+
+# MAP:       28 8 {{.*}}tmp3.a(common-archive-lookup.s.tmp2.o):(.data)
+# MAP-NEXT:  28 1 block
+
+# ASM:         .type   block, at object
+# ASM:       block:
+# ASM-NEXT:    .long 5
+# ASM:         .size   block, 20
+
+#--- ref.s
+  .text
+  .abiversion 2
+  .global bar
+  .type bar, at function
+bar:
+  addis 4, 2, block at toc@ha
+  addi  4, 4, block at toc@l
+
+## Tentative definition of 'block'.
+  .comm block,40,8
+
+#--- refanddef.s
+## An alternate strong definition of block, in the same file as
+## a 
diff erent referenced symbol.
+  .text
+  .abiversion 2
+  .global bar
+  .type bar, at function
+bar:
+  addis 4, 2, block at toc@ha
+  addi  4, 4, block at toc@l
+
+  .data
+  .type block, at object
+  .global block
+  .p2align 3
+block:
+  .quad   0x4005bf0a8b145703              # double 2.7182818284589998
+  .space  32
+  .size   block, 40
+
+#--- def.s
+## Strong definition of 'block'.
+  .data
+  .type block, at object
+  .global block
+  .p2align 3
+block:
+  .quad   0x400921fb54442eea              # double 3.1415926535900001
+  .space  32
+  .size   block, 40
+
+#--- weak.s
+## Weak definition of `block`.
+  .data
+  .type block, at object
+  .weak block
+  .p2align 3
+block:
+  .quad   0x400921fb54442eea              # double 3.1415926535900001
+  .space  32
+  .size   block, 40
+
+#--- main.s
+  .global _start
+_start:
+  bl bar
+  blr
+
+
+#--- blockdata.ll
+target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+ at block = dso_local local_unnamed_addr global [5 x i32] [i32 5, i32 0, i32 0, i32 0, i32 0], align 4
+
+#--- commonblock.ll
+target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+ at block =  common dso_local local_unnamed_addr global [5 x i32] zeroinitializer, align 4
+
+define dso_local i32 @bar(i32 signext %i) local_unnamed_addr {
+entry:
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @block, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 8
+  ret i32 %0
+}

diff  --git a/lld/test/ELF/warn-backrefs.s b/lld/test/ELF/warn-backrefs.s
index 6912955700f9..9a43e973315d 100644
--- a/lld/test/ELF/warn-backrefs.s
+++ b/lld/test/ELF/warn-backrefs.s
@@ -66,6 +66,18 @@
 # RUN: echo '.weak foo; foo:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tweak.o
 # RUN: ld.lld --fatal-warnings --warn-backrefs --start-lib %tweak.o --end-lib %t1.o %t2.o -o /dev/null
 
+## Check common symbols. A common sym might later be replaced by a non-common definition.
+# RUN: echo '.comm obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tcomm.o
+# RUN: echo '.type obj, at object; .data; .globl obj; .p2align 2; obj: .long 55; .size obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tstrong.o
+# RUN: echo '.globl foo; foo: movl obj(%rip), %eax' | llvm-mc -triple=x86_64 -filetype=obj -o %t5.o
+# RUN: llvm-ar rcs %tcomm.a %tcomm.o
+# RUN: llvm-ar rcs %tstrong.a %tstrong.o
+# RUN: ld.lld --warn-backrefs %tcomm.a %t1.o %t5.o 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s
+# RUN: ld.lld --fatal-warnings --warn-backrefs %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null
+# RUN: ld.lld --warn-backrefs --no-fortran-common %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s
+
+# COMM: ld.lld: warning: backward reference detected: obj in {{.*}}5.o refers to {{.*}}comm.a
+
 ## If a lazy definition appears after the backward reference, don't warn.
 ## A traditional Unix linker will resolve the reference to the later definition.
 # RUN: ld.lld --fatal-warnings --warn-backrefs %t2.a %t1.o %t2.a -o /dev/null


        


More information about the llvm-branch-commits mailing list