[lld] 88d66f6 - [ELF] Move duplicate symbol check after input file parsing

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 22 10:08:02 PST 2022


Author: Fangrui Song
Date: 2022-02-22T10:07:58-08:00
New Revision: 88d66f6ed1e5a3a9370a3181b7307fe65590e3ac

URL: https://github.com/llvm/llvm-project/commit/88d66f6ed1e5a3a9370a3181b7307fe65590e3ac
DIFF: https://github.com/llvm/llvm-project/commit/88d66f6ed1e5a3a9370a3181b7307fe65590e3ac.diff

LOG: [ELF] Move duplicate symbol check after input file parsing

https://discourse.llvm.org/t/parallel-input-file-parsing/60164

To decouple symbol initialization and section initialization, `Defined::section`
assignment should be postponed after input file parsing. To avoid spurious
duplicate definition error due to two definitions in COMDAT groups of the same
signature, we should postpone the duplicate symbol check.

The function is called postScan instead of a more specific name like
checkDuplicateSymbols, because we may merge Symbol::mergeProperties into
postScan. It is placed after compileBitcodeFiles to apply to ET_REL files
produced by LTO. This causes minor diagnostic regression
for skipLinkedOutput configurations: ld.lld --thinlto-index-only a.bc b.o
(bitcode definition prevails) won't detect duplicate symbol error. I think this
is an acceptable compromise. The important cases where (a) both files are
bitcode or (b) --thinlto-index-only is unused are still detected.

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D119908

Added: 
    

Modified: 
    lld/ELF/Driver.cpp
    lld/ELF/InputFiles.cpp
    lld/ELF/InputFiles.h
    lld/ELF/SymbolTable.cpp
    lld/ELF/SymbolTable.h
    lld/ELF/Symbols.cpp
    lld/ELF/Symbols.h
    lld/test/ELF/invalid/symtab-sh-info-dup.test
    lld/test/ELF/lto/duplicated.ll
    lld/test/ELF/vs-diagnostics-duplicate.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 4910a7d5a1633..cfc99dd115dc1 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -2118,6 +2118,8 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
       map.try_emplace(sym, sym2);
       // If both foo at v1 and foo@@v1 are defined and non-weak, report a duplicate
       // definition error.
+      if (sym->isDefined())
+        sym2->checkDuplicate(cast<Defined>(*sym));
       sym2->resolve(*sym);
       // Eliminate foo at v1 from the symbol table.
       sym->symbolKind = Symbol::PlaceholderKind;
@@ -2223,6 +2225,25 @@ static uint32_t getAndFeatures() {
   return ret;
 }
 
+static void postParseObjectFile(ELFFileBase *file) {
+  switch (config->ekind) {
+  case ELF32LEKind:
+    cast<ObjFile<ELF32LE>>(file)->postParse();
+    break;
+  case ELF32BEKind:
+    cast<ObjFile<ELF32BE>>(file)->postParse();
+    break;
+  case ELF64LEKind:
+    cast<ObjFile<ELF64LE>>(file)->postParse();
+    break;
+  case ELF64BEKind:
+    cast<ObjFile<ELF64BE>>(file)->postParse();
+    break;
+  default:
+    llvm_unreachable("");
+  }
+}
+
 // Do actual linking. Note that when this function is called,
 // all linker scripts have already been parsed.
 void LinkerDriver::link(opt::InputArgList &args) {
@@ -2340,6 +2361,11 @@ void LinkerDriver::link(opt::InputArgList &args) {
     for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
       handleLibcall(s);
 
+  // No more lazy bitcode can be extracted at this point. Do post parse work
+  // like checking duplicate symbols.
+  parallelForEach(objectFiles, postParseObjectFile);
+  parallelForEach(bitcodeFiles, [](BitcodeFile *file) { file->postParse(); });
+
   // Return if there were name resolution errors.
   if (errorCount())
     return;
@@ -2393,6 +2419,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
   //
   // With this the symbol table should be complete. After this, no new names
   // except a few linker-synthesized ones will be added to the symbol table.
+  const size_t numObjsBeforeLTO = objectFiles.size();
   invokeELFT(compileBitcodeFiles, skipLinkedOutput);
 
   // Symbol resolution finished. Report backward reference problems.
@@ -2404,6 +2431,11 @@ void LinkerDriver::link(opt::InputArgList &args) {
   if (skipLinkedOutput)
     return;
 
+  // compileBitcodeFiles may have produced lto.tmp object files. After this, no
+  // more file will be added.
+  auto newObjectFiles = makeArrayRef(objectFiles).slice(numObjsBeforeLTO);
+  parallelForEach(newObjectFiles, postParseObjectFile);
+
   // Handle --exclude-libs again because lto.tmp may reference additional
   // libcalls symbols defined in an excluded archive. This may override
   // versionId set by scanVersionScript().

diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index e4509c1f78880..e4de1a3462823 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1149,6 +1149,33 @@ void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
   }
 }
 
+// Called after all ObjFile::parse is called for all ObjFiles. This checks
+// duplicate symbols and may do symbol property merge in the future.
+template <class ELFT> void ObjFile<ELFT>::postParse() {
+  ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
+  for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
+    const Elf_Sym &eSym = eSyms[i];
+    const Symbol &sym = *symbols[i];
+    // !sym.file allows a symbol assignment redefines a symbol without an error.
+    if (sym.file == this || !sym.file || !sym.isDefined() ||
+        eSym.st_shndx == SHN_UNDEF || eSym.st_shndx == SHN_COMMON ||
+        eSym.getBinding() == STB_WEAK)
+      continue;
+    uint32_t secIdx = eSym.st_shndx;
+    if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
+      secIdx = cantFail(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
+    else if (secIdx >= SHN_LORESERVE)
+      secIdx = 0;
+    if (sections[secIdx] == &InputSection::discarded)
+      continue;
+    // Allow absolute symbols with the same value for GNU ld compatibility.
+    if (!cast<Defined>(sym).section && !sections[secIdx] &&
+        cast<Defined>(sym).value == eSym.st_value)
+      continue;
+    reportDuplicate(sym, this, sections[secIdx], eSym.st_value);
+  }
+}
+
 // The handling of tentative definitions (COMMON symbols) in archives is murky.
 // A tentative definition will be promoted to a global definition if there are
 // no non-tentative definitions to dominate it. When we hold a tentative
@@ -1617,7 +1644,6 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
 }
 
 template <class ELFT> void BitcodeFile::parse() {
-  std::vector<bool> keptComdats;
   for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
     keptComdats.push_back(
         s.second == Comdat::NoDeduplicate ||
@@ -1646,6 +1672,20 @@ void BitcodeFile::parseLazy() {
     }
 }
 
+void BitcodeFile::postParse() {
+  for (auto it : llvm::enumerate(obj->symbols())) {
+    const Symbol &sym = *symbols[it.index()];
+    const auto &objSym = it.value();
+    if (sym.file == this || !sym.isDefined() || objSym.isUndefined() ||
+        objSym.isCommon() || objSym.isWeak())
+      continue;
+    int c = objSym.getComdatIndex();
+    if (c != -1 && !keptComdats[c])
+      continue;
+    reportDuplicate(sym, this, nullptr, 0);
+  }
+}
+
 void BinaryFile::parse() {
   ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
   auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
@@ -1663,12 +1703,15 @@ void BinaryFile::parse() {
 
   llvm::StringSaver &saver = lld::saver();
 
-  symtab->addSymbol(Defined{nullptr, saver.save(s + "_start"), STB_GLOBAL,
-                            STV_DEFAULT, STT_OBJECT, 0, 0, section});
-  symtab->addSymbol(Defined{nullptr, saver.save(s + "_end"), STB_GLOBAL,
-                            STV_DEFAULT, STT_OBJECT, data.size(), 0, section});
-  symtab->addSymbol(Defined{nullptr, saver.save(s + "_size"), STB_GLOBAL,
-                            STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr});
+  symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"),
+                                       STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0,
+                                       0, section});
+  symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"),
+                                       STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
+                                       data.size(), 0, section});
+  symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"),
+                                       STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
+                                       data.size(), 0, nullptr});
 }
 
 InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,

diff  --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index bbd3072c54d5f..e2aba8e6250cf 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -273,6 +273,8 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   // Get cached DWARF information.
   DWARFCache *getDwarf();
 
+  void postParse();
+
 private:
   void initializeSections(bool ignoreComdats,
                           const llvm::object::ELFFile<ELFT> &obj);
@@ -315,7 +317,9 @@ class BitcodeFile : public InputFile {
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
   template <class ELFT> void parse();
   void parseLazy();
+  void postParse();
   std::unique_ptr<llvm::lto::InputFile> obj;
+  std::vector<bool> keptComdats;
 };
 
 // .so file.

diff  --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 2cfc5fcd19b3b..bb948218a2364 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -102,6 +102,16 @@ Symbol *SymbolTable::addSymbol(const Symbol &newSym) {
   return sym;
 }
 
+// This variant of addSymbol is used by BinaryFile::parse to check duplicate
+// symbol errors.
+Symbol *SymbolTable::addAndCheckDuplicate(const Defined &newSym) {
+  Symbol *sym = insert(newSym.getName());
+  if (sym->isDefined())
+    sym->checkDuplicate(newSym);
+  sym->resolve(newSym);
+  return sym;
+}
+
 Symbol *SymbolTable::find(StringRef name) {
   auto it = symMap.find(CachedHashStringRef(name));
   if (it == symMap.end())

diff  --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index 60bd10a0b168a..e55daffe8ba46 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -39,6 +39,7 @@ class SymbolTable {
   Symbol *insert(StringRef name);
 
   Symbol *addSymbol(const Symbol &newSym);
+  Symbol *addAndCheckDuplicate(const Defined &newSym);
 
   void scanVersionScript();
 

diff  --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 972c70d53af8e..9b231ba46a561 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -572,21 +572,11 @@ int Symbol::compare(const Symbol *other) const {
     return -1;
   }
 
-  auto *oldSym = cast<Defined>(this);
-  auto *newSym = cast<Defined>(other);
-
-  if (isa_and_nonnull<BitcodeFile>(other->file))
-    return 0;
-
-  if (!oldSym->section && !newSym->section && oldSym->value == newSym->value &&
-      newSym->binding == STB_GLOBAL)
-    return -1;
-
   return 0;
 }
 
-static void reportDuplicate(const Symbol &sym, InputFile *newFile,
-                            InputSectionBase *errSec, uint64_t errOffset) {
+void elf::reportDuplicate(const Symbol &sym, InputFile *newFile,
+                          InputSectionBase *errSec, uint64_t errOffset) {
   if (config->allowMultipleDefinition)
     return;
   const Defined *d = cast<Defined>(&sym);
@@ -619,6 +609,13 @@ static void reportDuplicate(const Symbol &sym, InputFile *newFile,
   error(msg);
 }
 
+void Symbol::checkDuplicate(const Defined &other) const {
+  if (compare(&other) == 0)
+    reportDuplicate(*this, other.file,
+                    dyn_cast_or_null<InputSectionBase>(other.section),
+                    other.value);
+}
+
 void Symbol::resolveCommon(const CommonSymbol &other) {
   int cmp = compare(&other);
   if (cmp < 0)
@@ -653,10 +650,6 @@ void Symbol::resolveDefined(const Defined &other) {
   int cmp = compare(&other);
   if (cmp > 0)
     replace(other);
-  else if (cmp == 0)
-    reportDuplicate(*this, other.file,
-                    dyn_cast_or_null<InputSectionBase>(other.section),
-                    other.value);
 }
 
 template <class LazyT>

diff  --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 6cd8370317124..dd245660d13d5 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -213,6 +213,8 @@ class Symbol {
   // non-lazy object causes a runtime error.
   void extract() const;
 
+  void checkDuplicate(const Defined &other) const;
+
 private:
   void resolveUndefined(const Undefined &other);
   void resolveCommon(const CommonSymbol &other);
@@ -569,6 +571,8 @@ template <typename... T> Defined *makeDefined(T &&...args) {
       Defined(std::forward<T>(args)...);
 }
 
+void reportDuplicate(const Symbol &sym, InputFile *newFile,
+                     InputSectionBase *errSec, uint64_t errOffset);
 void maybeWarnUnorderableSymbol(const Symbol *sym);
 bool computeIsPreemptible(const Symbol &sym);
 void reportBackrefs();

diff  --git a/lld/test/ELF/invalid/symtab-sh-info-dup.test b/lld/test/ELF/invalid/symtab-sh-info-dup.test
index 36c7af0d66c6f..8b0833d67bbb2 100644
--- a/lld/test/ELF/invalid/symtab-sh-info-dup.test
+++ b/lld/test/ELF/invalid/symtab-sh-info-dup.test
@@ -9,11 +9,11 @@
 # RUN: not ld.lld %t.o %t.o -o /dev/null 2>&1 | FileCheck %s
 
 # CHECK:      error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
+# CHECK-NEXT: error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
 # CHECK-NEXT: error: duplicate symbol: _start
 # CHECK-NEXT: >>> defined at {{.*}}.o:(.text+0x0)
 # CHECK-NEXT: >>> defined at {{.*}}.o:(.text+0x0)
 # CHECK-EMPTY:
-# CHECK-NEXT: error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
 
 # RUN: ld.lld --noinhibit-exec %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN
 # WARN: warning: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)

diff  --git a/lld/test/ELF/lto/duplicated.ll b/lld/test/ELF/lto/duplicated.ll
index 60bdb4455ee6b..6b09260edf09b 100644
--- a/lld/test/ELF/lto/duplicated.ll
+++ b/lld/test/ELF/lto/duplicated.ll
@@ -8,17 +8,18 @@
 ;; --thinlto-index-only skips some passes. Test the error is present.
 ; RUN: not ld.lld %t/a.bc %t/a.bc --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s
 ; RUN: not ld.lld %t/b.o %t/a.bc --lto-emit-asm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2
+; RUN: not ld.lld %t/a.bc %t/b.o --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2
 
 ;; --undefined-glob g extracts %t/c.bc which causes a duplicate symbol error.
-; RUN: not ld.lld %t/a.bc --start-lib %t/c.bc --undefined-glob g --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK
+; RUN: not ld.lld %t/a.bc --start-lib %t/c.bc --undefined-glob g --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s
 
 ; CHECK:      duplicate symbol: f
 ; CHECK-NEXT: >>> defined in {{.*}}.bc
 ; CHECK-NEXT: >>> defined in {{.*}}.bc
 
 ; CHECK2:      duplicate symbol: f
-; CHECK2-NEXT: >>> defined in {{.*}}.o
-; CHECK2-NEXT: >>> defined in {{.*}}.bc
+; CHECK2-NEXT: >>> defined in {{.*}}
+; CHECK2-NEXT: >>> defined in {{.*}}
 
 ;--- a.ll
 target triple = "x86_64-unknown-linux-gnu"

diff  --git a/lld/test/ELF/vs-diagnostics-duplicate.s b/lld/test/ELF/vs-diagnostics-duplicate.s
index cf4637fded9d8..397e14ac68cb2 100644
--- a/lld/test/ELF/vs-diagnostics-duplicate.s
+++ b/lld/test/ELF/vs-diagnostics-duplicate.s
@@ -2,7 +2,7 @@
 // RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
 // RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/vs-diagnostics-duplicate2.s -o %t2.o
 // RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/vs-diagnostics-duplicate3.s -o %t3.o
-// RUN: not ld.lld --vs-diagnostics %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
+// RUN: not ld.lld --vs-diagnostics --threads=1 %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
 
 // Case 1. The source locations are unknown for both symbols.
 // CHECK:      {{.*}}ld.lld{{.*}}: error: duplicate symbol: foo


        


More information about the llvm-commits mailing list