[llvm-branch-commits] [lld] 78f6498 - [lld-macho] Flesh out STABS implementation
Jez Ng via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Dec 1 15:10:19 PST 2020
Author: Jez Ng
Date: 2020-12-01T15:05:21-08:00
New Revision: 78f6498cdcdb5a7644b1c32615cfe2fdfd9c2545
URL: https://github.com/llvm/llvm-project/commit/78f6498cdcdb5a7644b1c32615cfe2fdfd9c2545
DIFF: https://github.com/llvm/llvm-project/commit/78f6498cdcdb5a7644b1c32615cfe2fdfd9c2545.diff
LOG: [lld-macho] Flesh out STABS implementation
This addresses a lot of the comments in {D89257}. Ideally it'd have been
done in the same diff, but the commits in between make that difficult.
This diff implements:
* N_GSYM and N_STSYM, the STABS for global and static symbols
* Has the STABS reflect the section IDs of their referent symbols
* Ensures we don't fail when encountering absolute symbols or files with
no debug info
* Sorts STABS symbols by file to minimize the number of N_OSO entries
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D92366
Added:
Modified:
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/test/MachO/stabs.s
Removed:
################################################################################
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 19d531cc5028..8a451d0dc217 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -76,6 +76,7 @@ using namespace lld::macho;
std::vector<InputFile *> macho::inputFiles;
std::unique_ptr<TarWriter> macho::tar;
+int InputFile::idCount = 0;
// Open a given file path and return it as a memory-mapped file.
Optional<MemoryBufferRef> macho::readFile(StringRef path) {
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index f7fd77dacff5..6ca9a40a49b0 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -65,13 +65,15 @@ class InputFile {
std::vector<Symbol *> symbols;
ArrayRef<llvm::MachO::section_64> sectionHeaders;
std::vector<SubsectionMap> subsections;
+ // Provides an easy way to sort InputFiles deterministically.
+ const int id;
protected:
InputFile(Kind kind, MemoryBufferRef mb)
- : mb(mb), fileKind(kind), name(mb.getBufferIdentifier()) {}
+ : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {}
InputFile(Kind kind, const llvm::MachO::InterfaceFile &interface)
- : fileKind(kind), name(saver.save(interface.getPath())) {}
+ : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
void parseSections(ArrayRef<llvm::MachO::section_64>);
@@ -85,6 +87,8 @@ class InputFile {
private:
const Kind fileKind;
const StringRef name;
+
+ static int idCount;
};
// .o file
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index ae66961310c5..6ba067bbc9e5 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -617,22 +617,71 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
stabs.emplace_back(std::move(stab));
}
-void SymtabSection::emitFunStabs(Defined *defined) {
- {
- StabsEntry stab(MachO::N_FUN);
- stab.sect = 1;
- stab.strx = stringTableSection.addString(defined->getName());
- stab.value = defined->getVA();
- stabs.emplace_back(std::move(stab));
+void SymtabSection::emitEndFunStab(Defined *defined) {
+ StabsEntry stab(MachO::N_FUN);
+ // FIXME this should be the size of the symbol. Using the section size in
+ // lieu is only correct if .subsections_via_symbols is set.
+ stab.value = defined->isec->getSize();
+ stabs.emplace_back(std::move(stab));
+}
+
+void SymtabSection::emitStabs() {
+ std::vector<Defined *> symbolsNeedingStabs;
+ for (const SymtabEntry &entry :
+ concat<SymtabEntry>(localSymbols, externalSymbols)) {
+ Symbol *sym = entry.sym;
+ if (auto *defined = dyn_cast<Defined>(sym)) {
+ if (defined->isAbsolute())
+ continue;
+ InputSection *isec = defined->isec;
+ ObjFile *file = dyn_cast_or_null<ObjFile>(isec->file);
+ if (!file || !file->compileUnit)
+ continue;
+ symbolsNeedingStabs.push_back(defined);
+ }
}
- {
- StabsEntry stab(MachO::N_FUN);
- // FIXME this should be the size of the symbol. Using the section size in
- // lieu is only correct if .subsections_via_symbols is set.
- stab.value = defined->isec->getSize();
- stabs.emplace_back(std::move(stab));
+ llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) {
+ return a->isec->file->id < b->isec->file->id;
+ });
+
+ // Emit STABS symbols so that dsymutil and/or the debugger can map address
+ // regions in the final binary to the source and object files from which they
+ // originated.
+ InputFile *lastFile = nullptr;
+ for (Defined *defined : symbolsNeedingStabs) {
+ InputSection *isec = defined->isec;
+ ObjFile *file = dyn_cast<ObjFile>(isec->file);
+ assert(file);
+
+ if (lastFile == nullptr || lastFile != file) {
+ if (lastFile != nullptr)
+ emitEndSourceStab();
+ lastFile = file;
+
+ emitBeginSourceStab(file->compileUnit);
+ emitObjectFileStab(file);
+ }
+
+ StabsEntry symStab;
+ symStab.sect = defined->isec->parent->index;
+ symStab.strx = stringTableSection.addString(defined->getName());
+ symStab.value = defined->getVA();
+
+ // XXX is it right to assume that all symbols in __text are function
+ // symbols?
+ if (isec->name == "__text") {
+ symStab.type = MachO::N_FUN;
+ stabs.emplace_back(std::move(symStab));
+ emitEndFunStab(defined);
+ } else {
+ symStab.type = defined->isExternal() ? MachO::N_GSYM : MachO::N_STSYM;
+ stabs.emplace_back(std::move(symStab));
+ }
}
+
+ if (!stabs.empty())
+ emitEndSourceStab();
}
void SymtabSection::finalizeContents() {
@@ -663,41 +712,7 @@ void SymtabSection::finalizeContents() {
}
}
- InputFile *lastFile = nullptr;
- for (Symbol *sym : symtab->getSymbols()) {
- // Emit STABS symbols so that dsymutil and/or the debugger can map address
- // regions in the final binary to the source and object files from which
- // they originated.
- if (auto *defined = dyn_cast<Defined>(sym)) {
- if (defined->isAbsolute())
- continue;
-
- InputSection *isec = defined->isec;
- // XXX is it right to assume that all symbols in __text are function
- // symbols?
- if (isec->name == "__text") {
- ObjFile *file = dyn_cast<ObjFile>(isec->file);
- assert(file);
- if (!file->compileUnit)
- continue;
-
- if (lastFile == nullptr || lastFile != file) {
- if (lastFile != nullptr)
- emitEndSourceStab();
- lastFile = file;
-
- emitBeginSourceStab(file->compileUnit);
- emitObjectFileStab(file);
- }
- emitFunStabs(defined);
- }
- // TODO emit stabs for non-function symbols too
- }
- }
-
- if (!stabs.empty())
- emitEndSourceStab();
-
+ emitStabs();
uint32_t symtabIndex = stabs.size();
for (const SymtabEntry &entry :
concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) {
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 56f4bf66ce23..0f3775b11b47 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -410,12 +410,13 @@ struct SymtabEntry {
};
struct StabsEntry {
- uint8_t type;
+ uint8_t type = 0;
uint32_t strx = 0;
uint8_t sect = 0;
uint16_t desc = 0;
uint64_t value = 0;
+ StabsEntry() = default;
explicit StabsEntry(uint8_t type) : type(type) {}
};
@@ -440,7 +441,8 @@ class SymtabSection : public LinkEditSection {
void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
void emitEndSourceStab();
void emitObjectFileStab(ObjFile *);
- void emitFunStabs(Defined *);
+ void emitEndFunStab(Defined *);
+ void emitStabs();
StringTableSection &stringTableSection;
// STABS symbols are always local symbols, but we represent them with special
diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s
index 81f1ecbc1e96..a3f1e2906d2b 100644
--- a/lld/test/MachO/stabs.s
+++ b/lld/test/MachO/stabs.s
@@ -1,44 +1,82 @@
-# REQUIRES: x86
+# REQUIRES: x86, shell
# UNSUPPORTED: system-windows
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-debug.s -o %t/no-debug.o
## Set modtimes of the files for deterministic test output.
# RUN: env TZ=UTC touch -t "197001010000.16" %t/test.o
# RUN: env TZ=UTC touch -t "197001010000.32" %t/foo.o
# RUN: rm -f %t/foo.a
# RUN: llvm-ar rcsU %t/foo.a %t/foo.o
-# RUN: %lld -lSystem %t/test.o %t/foo.o -o %t/test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
+# RUN: %lld -lSystem %t/test.o %t/foo.o %t/no-debug.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN: FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
## Check that we emit the right modtime even when the object file is in an
## archive.
-# RUN: %lld -lSystem %t/test.o %t/foo.a -o %t/test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
+# RUN: %lld -lSystem %t/test.o %t/foo.a %t/no-debug.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN: FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
## Check that we emit absolute paths to the object files in our OSO entries
## even if our inputs are relative paths.
-# RUN: cd %t && %lld -lSystem test.o foo.o -o test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
-
-# RUN: cd %t && %lld -lSystem %t/test.o %t/foo.a -o %t/test
-# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
-
-# CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp
-# CHECK-NEXT: 0000000000000010 - 03 0001 OSO [[DIR]]/test.o
-# CHECK-NEXT: [[#%x, MAIN:]] - 01 0000 FUN _main
-# CHECK-NEXT: 0000000000000006 - 00 0000 FUN
-# CHECK-NEXT: 0000000000000000 - 01 0000 SO
-# CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp
-# CHECK-NEXT: 0000000000000020 - 03 0001 OSO [[FOO_PATH]]
-# CHECK-NEXT: [[#%x, FOO:]] - 01 0000 FUN _foo
-# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
-# CHECK-NEXT: 0000000000000000 - 01 0000 SO
-# CHECK-NEXT: [[#MAIN]] T _main
-# CHECK-NEXT: [[#FOO]] T _foo
+# RUN: cd %t && %lld -lSystem test.o foo.o no-debug.o -o test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN: FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.o
+
+# RUN: cd %t && %lld -lSystem test.o foo.a no-debug.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; llvm-nm -pa %t/test) | \
+# RUN: FileCheck %s -DDIR=%t -DFOO_PATH=%t/foo.a\(foo.o\)
+
+# CHECK: Sections:
+# CHECK-NEXT: Idx Name
+# CHECK-NEXT: [[#TEXT_ID:]] __text
+# CHECK-NEXT: [[#DATA_ID:]] __data
+# CHECK-NEXT: [[#MORE_DATA_ID:]] more_data
+# CHECK-NEXT: [[#COMM_ID:]] __common
+
+# CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp
+# CHECK-NEXT: 0000000000000010 - 03 0001 OSO [[DIR]]/test.o
+# CHECK-NEXT: [[#%x, STATIC:]] - 0[[#MORE_DATA_ID + 1]] 0000 STSYM _static_var
+# CHECK-NEXT: [[#%x, MAIN:]] - 0[[#TEXT_ID + 1]] 0000 FUN _main
+# CHECK-NEXT: 0000000000000006 - 00 0000 FUN
+# CHECK-NEXT: [[#%x, GLOB:]] - 0[[#DATA_ID + 1]] 0000 GSYM _global_var
+# CHECK-NEXT: [[#%x, ZERO:]] - 0[[#COMM_ID + 1]] 0000 GSYM _zero
+# CHECK-NEXT: 0000000000000000 - 01 0000 SO
+# CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp
+# CHECK-NEXT: 0000000000000020 - 03 0001 OSO [[FOO_PATH]]
+# CHECK-NEXT: [[#%x, FOO:]] - 0[[#TEXT_ID + 1]] 0000 FUN _foo
+# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
+# CHECK-NEXT: 0000000000000000 - 01 0000 SO
+# CHECK-NEXT: [[#STATIC]] s _static_var
+# CHECK-NEXT: [[#MAIN]] T _main
+# CHECK-NEXT: {{[0-9af]+}} A _abs
+# CHECK-NEXT: [[#GLOB]] D _global_var
+# CHECK-NEXT: [[#ZERO]] S _zero
+# CHECK-NEXT: [[#FOO]] T _foo
+# CHECK-NEXT: {{[0-9af]+}} T _no_debug
+# CHECK-EMPTY:
#--- test.s
+
+## Make sure we don't create STABS entries for absolute symbols.
+.globl _abs
+_abs = 0x123
+
+.section __DATA, __data
+.globl _global_var
+_global_var:
+ .quad 123
+
+.section __DATA, more_data
+_static_var:
+ .quad 123
+
+.globl _zero
+.zerofill __DATA,__common,_zero,4,2
+
.text
.globl _main
_main:
@@ -126,3 +164,10 @@ Ldebug_info_start0:
Ldebug_info_end0:
.subsections_via_symbols
.section __DWARF,__debug_line,regular,debug
+
+#--- no-debug.s
+## This file has no debug info.
+.text
+.globl _no_debug
+_no_debug:
+ ret
More information about the llvm-branch-commits
mailing list