[lld] c4d13f7 - [ELF] Refactor ObjFile<ELFT>::initializeSymbols to enforce the invariant: InputFile::symbols has non null entry

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 19 09:05:48 PDT 2020


Author: Fangrui Song
Date: 2020-06-19T09:05:37-07:00
New Revision: c4d13f72a6599179f34481d6d848e9fce4ba5ef4

URL: https://github.com/llvm/llvm-project/commit/c4d13f72a6599179f34481d6d848e9fce4ba5ef4
DIFF: https://github.com/llvm/llvm-project/commit/c4d13f72a6599179f34481d6d848e9fce4ba5ef4.diff

LOG: [ELF] Refactor ObjFile<ELFT>::initializeSymbols to enforce the invariant: InputFile::symbols has non null entry

Fixes PR46348.

ObjFile<ELFT>::initializeSymbols contains two symbol iteration loops:

```
for each symbol
  if non-inheriting && non-local
    fill in this->symbols[i]

for each symbol
  if local
    fill in this->symbols[i]
  else
    symbol resolution
```

Symbol resolution can trigger a duplicate symbol error which will call
InputSectionBase::getObjMsg to iterate over InputFile::symbols.  If a
non-local symbol appears after the non-local symbol being resolved
(violating ELF spec), its `this->symbols[i]` entry has not been filled
in, InputSectionBase::getObjMsg will crash due to
`dyn_cast<Defined>(nullptr)`.

To fix the bug, reorganize the two loops to ensure this->symbols is
complete before symbol resolution. This enforces the invariant:
InputFile::symbols has none null entry when InputFile::getSymbols() is called.

```
for each symbol
  if non-inheriting
    fill in this->symbols[i]

for each symbol starting from firstGlobal
  if non-local
    symbol resolution
```

Additionally, move the (non-local symbol in local part of .symtab)
diagnostic from Writer<ELFT>::copyLocalSymbols() to initializeSymbols().

Reviewed By: grimar, jhenderson

Differential Revision: https://reviews.llvm.org/D81988

Added: 
    lld/test/ELF/invalid/symtab-sh-info-dup.test

Modified: 
    lld/ELF/InputFiles.cpp
    lld/ELF/Writer.cpp
    lld/test/ELF/invalid/symtab-sh-info.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 5bbd6f0df7e9..b0bec7349eb8 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1049,51 +1049,68 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
   this->symbols.resize(eSyms.size());
 
-  // Our symbol table may have already been partially initialized
+  // Fill in InputFile::symbols. Some entries have been initialized
   // because of LazyObjFile.
-  for (size_t i = 0, end = eSyms.size(); i != end; ++i)
-    if (!this->symbols[i] && eSyms[i].getBinding() != STB_LOCAL)
-      this->symbols[i] =
-          symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this));
-
-  // Fill this->Symbols. A symbol is either local or global.
   for (size_t i = 0, end = eSyms.size(); i != end; ++i) {
+    if (this->symbols[i])
+      continue;
     const Elf_Sym &eSym = eSyms[i];
-
-    // Read symbol attributes.
     uint32_t secIdx = getSectionIndex(eSym);
     if (secIdx >= this->sections.size())
       fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
+    if (eSym.getBinding() != STB_LOCAL) {
+      if (i < firstGlobal)
+        error(toString(this) + ": non-local symbol (" + Twine(i) +
+              ") found at index < .symtab's sh_info (" + Twine(firstGlobal) +
+              ")");
+      this->symbols[i] =
+          symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this));
+      continue;
+    }
+
+    // Handle local symbols. Local symbols are not added to the symbol
+    // table because they are not visible from other object files. We
+    // allocate symbol instances and add their pointers to symbols.
+    if (i >= firstGlobal)
+      errorOrWarn(toString(this) + ": STB_LOCAL symbol (" + Twine(i) +
+                  ") found at index >= .symtab's sh_info (" +
+                  Twine(firstGlobal) + ")");
 
     InputSectionBase *sec = this->sections[secIdx];
+    uint8_t type = eSym.getType();
+    if (type == STT_FILE)
+      sourceFile = CHECK(eSym.getName(this->stringTable), this);
+    if (this->stringTable.size() <= eSym.st_name)
+      fatal(toString(this) + ": invalid symbol name offset");
+    StringRefZ name = this->stringTable.data() + eSym.st_name;
+
+    if (eSym.st_shndx == SHN_UNDEF)
+      this->symbols[i] =
+          make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type);
+    else if (sec == &InputSection::discarded)
+      this->symbols[i] =
+          make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type,
+                          /*discardedSecIdx=*/secIdx);
+    else
+      this->symbols[i] = make<Defined>(this, name, STB_LOCAL, eSym.st_other,
+                                       type, eSym.st_value, eSym.st_size, sec);
+  }
+
+  // Symbol resolution of non-local symbols.
+  for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
+    const Elf_Sym &eSym = eSyms[i];
     uint8_t binding = eSym.getBinding();
+    if (binding == STB_LOCAL)
+      continue; // Errored above.
+
+    uint32_t secIdx = getSectionIndex(eSym);
+    InputSectionBase *sec = this->sections[secIdx];
     uint8_t stOther = eSym.st_other;
     uint8_t type = eSym.getType();
     uint64_t value = eSym.st_value;
     uint64_t size = eSym.st_size;
     StringRefZ name = this->stringTable.data() + eSym.st_name;
 
-    // Handle local symbols. Local symbols are not added to the symbol
-    // table because they are not visible from other object files. We
-    // allocate symbol instances and add their pointers to Symbols.
-    if (binding == STB_LOCAL) {
-      if (eSym.getType() == STT_FILE)
-        sourceFile = CHECK(eSym.getName(this->stringTable), this);
-
-      if (this->stringTable.size() <= eSym.st_name)
-        fatal(toString(this) + ": invalid symbol name offset");
-
-      if (eSym.st_shndx == SHN_UNDEF)
-        this->symbols[i] = make<Undefined>(this, name, binding, stOther, type);
-      else if (sec == &InputSection::discarded)
-        this->symbols[i] = make<Undefined>(this, name, binding, stOther, type,
-                                           /*DiscardedSecIdx=*/secIdx);
-      else
-        this->symbols[i] =
-            make<Defined>(this, name, binding, stOther, type, value, size, sec);
-      continue;
-    }
-
     // Handle global undefined symbols.
     if (eSym.st_shndx == SHN_UNDEF) {
       this->symbols[i]->resolve(Undefined{this, name, binding, stOther, type});

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 9f64e4f34e12..57e006689252 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -763,9 +763,7 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
   for (InputFile *file : objectFiles) {
     ObjFile<ELFT> *f = cast<ObjFile<ELFT>>(file);
     for (Symbol *b : f->getLocalSymbols()) {
-      if (!b->isLocal())
-        fatal(toString(f) +
-              ": broken object: getLocalSymbols returns a non-local symbol");
+      assert(b->isLocal() && "should have been caught in initializeSymbols()");
       auto *dr = dyn_cast<Defined>(b);
 
       // No reason to keep local undefined symbol in symtab.

diff  --git a/lld/test/ELF/invalid/symtab-sh-info-dup.test b/lld/test/ELF/invalid/symtab-sh-info-dup.test
new file mode 100644
index 000000000000..130366beda20
--- /dev/null
+++ b/lld/test/ELF/invalid/symtab-sh-info-dup.test
@@ -0,0 +1,36 @@
+# REQUIRES: x86
+## The ELF spec says all symbols with STB_LOCAL binding precede the weak and
+## global symbols. If a local symbol is found in the non-local part of the
+## symbol table, make sure we have filled in all entries of InputFile::symbols.
+## Otherwise a null entry can lead to a null pointer dereference when iterating
+## over InputFile::symbols.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: not ld.lld %t.o %t.o -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK:      error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
+# CHECK-NEXT: error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
+# CHECK-NEXT: error: duplicate symbol: _start
+# CHECK-NEXT: >>> defined at {{.*}}.o:(.text+0x0)
+# CHECK-NEXT: >>> defined at {{.*}}.o:(.text+0x0)
+
+# RUN: ld.lld --noinhibit-exec %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN
+# WARN: warning: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
+
+!ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Type:  SHT_PROGBITS
+    Name:  .text
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+Symbols:
+  - Name:    _start
+    Section: .text
+    Binding: STB_GLOBAL
+  - Name:    local
+    Section: .text
+    Binding: STB_LOCAL

diff  --git a/lld/test/ELF/invalid/symtab-sh-info.s b/lld/test/ELF/invalid/symtab-sh-info.s
index ed5d2b472bd7..253bbf9e62d5 100644
--- a/lld/test/ELF/invalid/symtab-sh-info.s
+++ b/lld/test/ELF/invalid/symtab-sh-info.s
@@ -22,8 +22,8 @@ Symbols:
 ## sh_info has value 2 what says that non-local symbol `foo` is local.
 ## Check we report this case.
 # RUN: yaml2obj --docnum=2 %s -o %t.o
-# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR2 %s
-# ERR2: broken object: getLocalSymbols returns a non-local symbol
+# RUN: not ld.lld --noinhibit-exec %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR2 %s
+# ERR2: error: {{.*}}.o: non-local symbol (1) found at index < .symtab's sh_info (2)
 
 --- !ELF
 FileHeader:


        


More information about the llvm-commits mailing list