[llvm-branch-commits] [lld] 78f6498 - [lld-macho] Flesh out STABS implementation

Jez Ng via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Dec 1 15:10:19 PST 2020


Author: Jez Ng
Date: 2020-12-01T15:05:21-08:00
New Revision: 78f6498cdcdb5a7644b1c32615cfe2fdfd9c2545

URL: https://github.com/llvm/llvm-project/commit/78f6498cdcdb5a7644b1c32615cfe2fdfd9c2545
DIFF: https://github.com/llvm/llvm-project/commit/78f6498cdcdb5a7644b1c32615cfe2fdfd9c2545.diff

LOG: [lld-macho] Flesh out STABS implementation

This addresses a lot of the comments in {D89257}. Ideally it'd have been
done in the same diff, but the commits in between make that difficult.

This diff implements:
* N_GSYM and N_STSYM, the STABS for global and static symbols
* Has the STABS reflect the section IDs of their referent symbols
* Ensures we don't fail when encountering absolute symbols or files with
  no debug info
* Sorts STABS symbols by file to minimize the number of N_OSO entries

Reviewed By: clayborg

Differential Revision: https://reviews.llvm.org/D92366

Added: 
    

Modified: 
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/test/MachO/stabs.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 19d531cc5028..8a451d0dc217 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -76,6 +76,7 @@ using namespace lld::macho;
 
 std::vector<InputFile *> macho::inputFiles;
 std::unique_ptr<TarWriter> macho::tar;
+int InputFile::idCount = 0;
 
 // Open a given file path and return it as a memory-mapped file.
 Optional<MemoryBufferRef> macho::readFile(StringRef path) {

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index f7fd77dacff5..6ca9a40a49b0 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -65,13 +65,15 @@ class InputFile {
   std::vector<Symbol *> symbols;
   ArrayRef<llvm::MachO::section_64> sectionHeaders;
   std::vector<SubsectionMap> subsections;
+  // Provides an easy way to sort InputFiles deterministically.
+  const int id;
 
 protected:
   InputFile(Kind kind, MemoryBufferRef mb)
-      : mb(mb), fileKind(kind), name(mb.getBufferIdentifier()) {}
+      : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {}
 
   InputFile(Kind kind, const llvm::MachO::InterfaceFile &interface)
-      : fileKind(kind), name(saver.save(interface.getPath())) {}
+      : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
 
   void parseSections(ArrayRef<llvm::MachO::section_64>);
 
@@ -85,6 +87,8 @@ class InputFile {
 private:
   const Kind fileKind;
   const StringRef name;
+
+  static int idCount;
 };
 
 // .o file

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index ae66961310c5..6ba067bbc9e5 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -617,22 +617,71 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
   stabs.emplace_back(std::move(stab));
 }
 
-void SymtabSection::emitFunStabs(Defined *defined) {
-  {
-    StabsEntry stab(MachO::N_FUN);
-    stab.sect = 1;
-    stab.strx = stringTableSection.addString(defined->getName());
-    stab.value = defined->getVA();
-    stabs.emplace_back(std::move(stab));
+void SymtabSection::emitEndFunStab(Defined *defined) {
+  StabsEntry stab(MachO::N_FUN);
+  // FIXME this should be the size of the symbol. Using the section size in
+  // lieu is only correct if .subsections_via_symbols is set.
+  stab.value = defined->isec->getSize();
+  stabs.emplace_back(std::move(stab));
+}
+
+void SymtabSection::emitStabs() {
+  std::vector<Defined *> symbolsNeedingStabs;
+  for (const SymtabEntry &entry :
+       concat<SymtabEntry>(localSymbols, externalSymbols)) {
+    Symbol *sym = entry.sym;
+    if (auto *defined = dyn_cast<Defined>(sym)) {
+      if (defined->isAbsolute())
+        continue;
+      InputSection *isec = defined->isec;
+      ObjFile *file = dyn_cast_or_null<ObjFile>(isec->file);
+      if (!file || !file->compileUnit)
+        continue;
+      symbolsNeedingStabs.push_back(defined);
+    }
   }
 
-  {
-    StabsEntry stab(MachO::N_FUN);
-    // FIXME this should be the size of the symbol. Using the section size in
-    // lieu is only correct if .subsections_via_symbols is set.
-    stab.value = defined->isec->getSize();
-    stabs.emplace_back(std::move(stab));
+  llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) {
+    return a->isec->file->id < b->isec->file->id;
+  });
+
+  // Emit STABS symbols so that dsymutil and/or the debugger can map address
+  // regions in the final binary to the source and object files from which they
+  // originated.
+  InputFile *lastFile = nullptr;
+  for (Defined *defined : symbolsNeedingStabs) {
+    InputSection *isec = defined->isec;
+    ObjFile *file = dyn_cast<ObjFile>(isec->file);
+    assert(file);
+
+    if (lastFile == nullptr || lastFile != file) {
+      if (lastFile != nullptr)
+        emitEndSourceStab();
+      lastFile = file;
+
+      emitBeginSourceStab(file->compileUnit);
+      emitObjectFileStab(file);
+    }
+
+    StabsEntry symStab;
+    symStab.sect = defined->isec->parent->index;
+    symStab.strx = stringTableSection.addString(defined->getName());
+    symStab.value = defined->getVA();
+
+    // XXX is it right to assume that all symbols in __text are function
+    // symbols?
+    if (isec->name == "__text") {
+      symStab.type = MachO::N_FUN;
+      stabs.emplace_back(std::move(symStab));
+      emitEndFunStab(defined);
+    } else {
+      symStab.type = defined->isExternal() ? MachO::N_GSYM : MachO::N_STSYM;
+      stabs.emplace_back(std::move(symStab));
+    }
   }
+
+  if (!stabs.empty())
+    emitEndSourceStab();
 }
 
 void SymtabSection::finalizeContents() {
@@ -663,41 +712,7 @@ void SymtabSection::finalizeContents() {
     }
   }
 
-  InputFile *lastFile = nullptr;
-  for (Symbol *sym : symtab->getSymbols()) {
-    // Emit STABS symbols so that dsymutil and/or the debugger can map address
-    // regions in the final binary to the source and object files from which
-    // they originated.
-    if (auto *defined = dyn_cast<Defined>(sym)) {
-      if (defined->isAbsolute())
-        continue;
-
-      InputSection *isec = defined->isec;
-      // XXX is it right to assume that all symbols in __text are function
-      // symbols?
-      if (isec->name == "__text") {
-        ObjFile *file = dyn_cast<ObjFile>(isec->file);
-        assert(file);
-        if (!file->compileUnit)
-          continue;
-
-        if (lastFile == nullptr || lastFile != file) {
-          if (lastFile != nullptr)
-            emitEndSourceStab();
-          lastFile = file;
-
-          emitBeginSourceStab(file->compileUnit);
-          emitObjectFileStab(file);
-        }
-        emitFunStabs(defined);
-      }
-      // TODO emit stabs for non-function symbols too
-    }
-  }
-
-  if (!stabs.empty())
-    emitEndSourceStab();
-
+  emitStabs();
   uint32_t symtabIndex = stabs.size();
   for (const SymtabEntry &entry :
        concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) {

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 56f4bf66ce23..0f3775b11b47 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -410,12 +410,13 @@ struct SymtabEntry {
 };
 
 struct StabsEntry {
-  uint8_t type;
+  uint8_t type = 0;
   uint32_t strx = 0;
   uint8_t sect = 0;
   uint16_t desc = 0;
   uint64_t value = 0;
 
+  StabsEntry() = default;
   explicit StabsEntry(uint8_t type) : type(type) {}
 };
 
@@ -440,7 +441,8 @@ class SymtabSection : public LinkEditSection {
   void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
   void emitEndSourceStab();
   void emitObjectFileStab(ObjFile *);
-  void emitFunStabs(Defined *);
+  void emitEndFunStab(Defined *);
+  void emitStabs();
 
   StringTableSection &stringTableSection;
   // STABS symbols are always local symbols, but we represent them with special

diff  --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s
index 81f1ecbc1e96..a3f1e2906d2b 100644
--- a/lld/test/MachO/stabs.s
+++ b/lld/test/MachO/stabs.s
@@ -1,44 +1,82 @@
-# REQUIRES: x86
+# REQUIRES: x86, shell
 # UNSUPPORTED: system-windows
 # RUN: split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-debug.s -o %t/no-debug.o
 ## Set modtimes of the files for deterministic test output.
 # RUN: env TZ=UTC touch -t "197001010000.16" %t/test.o
 # RUN: env TZ=UTC touch -t "197001010000.32" %t/foo.o
 # RUN: rm -f %t/foo.a
 # RUN: llvm-ar rcsU %t/foo.a %t/foo.o
 
-# RUN: %lld -lSystem %t/test.o %t/foo.o -o %t/test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
+# RUN: %lld -lSystem %t/test.o %t/foo.o %t/no-debug.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN:   FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
 
 ## Check that we emit the right modtime even when the object file is in an
 ## archive.
-# RUN: %lld -lSystem %t/test.o %t/foo.a -o %t/test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
+# RUN: %lld -lSystem %t/test.o %t/foo.a %t/no-debug.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN:   FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
 
 ## Check that we emit absolute paths to the object files in our OSO entries
 ## even if our inputs are relative paths.
-# RUN: cd %t && %lld -lSystem test.o foo.o -o test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
-
-# RUN: cd %t && %lld -lSystem %t/test.o %t/foo.a -o %t/test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
-
-# CHECK:      0000000000000000 - 00 0000    SO /tmp/test.cpp
-# CHECK-NEXT: 0000000000000010 - 03 0001   OSO [[DIR]]/test.o
-# CHECK-NEXT: [[#%x, MAIN:]]   - 01 0000   FUN _main
-# CHECK-NEXT: 0000000000000006 - 00 0000   FUN
-# CHECK-NEXT: 0000000000000000 - 01 0000    SO
-# CHECK-NEXT: 0000000000000000 - 00 0000    SO /foo.cpp
-# CHECK-NEXT: 0000000000000020 - 03 0001   OSO [[FOO_PATH]]
-# CHECK-NEXT: [[#%x, FOO:]]    - 01 0000   FUN _foo
-# CHECK-NEXT: 0000000000000001 - 00 0000   FUN
-# CHECK-NEXT: 0000000000000000 - 01 0000    SO
-# CHECK-NEXT: [[#MAIN]]        T _main
-# CHECK-NEXT: [[#FOO]]         T _foo
+# RUN: cd %t && %lld -lSystem test.o foo.o no-debug.o -o test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN:   FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
+
+# RUN: cd %t && %lld -lSystem test.o foo.a no-debug.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN:   FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
+
+# CHECK:       Sections:
+# CHECK-NEXT:  Idx                Name
+# CHECK-NEXT:  [[#TEXT_ID:]]      __text
+# CHECK-NEXT:  [[#DATA_ID:]]      __data
+# CHECK-NEXT:  [[#MORE_DATA_ID:]] more_data
+# CHECK-NEXT:  [[#COMM_ID:]]      __common
+
+# CHECK:       0000000000000000 - 00                     0000    SO /tmp/test.cpp
+# CHECK-NEXT:  0000000000000010 - 03                     0001   OSO [[DIR]]/test.o
+# CHECK-NEXT:  [[#%x, STATIC:]] - 0[[#MORE_DATA_ID + 1]] 0000 STSYM _static_var
+# CHECK-NEXT:  [[#%x, MAIN:]]   - 0[[#TEXT_ID + 1]]      0000   FUN _main
+# CHECK-NEXT:  0000000000000006 - 00                     0000   FUN
+# CHECK-NEXT:  [[#%x, GLOB:]]   - 0[[#DATA_ID + 1]]      0000  GSYM _global_var
+# CHECK-NEXT:  [[#%x, ZERO:]]   - 0[[#COMM_ID + 1]]      0000  GSYM _zero
+# CHECK-NEXT:  0000000000000000 - 01                     0000    SO
+# CHECK-NEXT:  0000000000000000 - 00                     0000    SO /foo.cpp
+# CHECK-NEXT:  0000000000000020 - 03                     0001   OSO [[FOO_PATH]]
+# CHECK-NEXT:  [[#%x, FOO:]]    - 0[[#TEXT_ID + 1]]      0000   FUN _foo
+# CHECK-NEXT:  0000000000000001 - 00                     0000   FUN
+# CHECK-NEXT:  0000000000000000 - 01                     0000    SO
+# CHECK-NEXT:  [[#STATIC]]      s _static_var
+# CHECK-NEXT:  [[#MAIN]]        T _main
+# CHECK-NEXT:  {{[0-9af]+}}     A _abs
+# CHECK-NEXT:  [[#GLOB]]        D _global_var
+# CHECK-NEXT:  [[#ZERO]]        S _zero
+# CHECK-NEXT:  [[#FOO]]         T _foo
+# CHECK-NEXT:  {{[0-9af]+}}     T _no_debug
+# CHECK-EMPTY:
 
 #--- test.s
+
+## Make sure we don't create STABS entries for absolute symbols.
+.globl _abs
+_abs = 0x123
+
+.section __DATA, __data
+.globl _global_var
+_global_var:
+  .quad 123
+
+.section __DATA, more_data
+_static_var:
+  .quad 123
+
+.globl  _zero
+.zerofill __DATA,__common,_zero,4,2
+
 .text
 .globl  _main
 _main:
@@ -126,3 +164,10 @@ Ldebug_info_start0:
 Ldebug_info_end0:
 .subsections_via_symbols
 .section  __DWARF,__debug_line,regular,debug
+
+#--- no-debug.s
+## This file has no debug info.
+.text
+.globl _no_debug
+_no_debug:
+  ret


        


More information about the llvm-branch-commits mailing list