[llvm] c649f29 - [llvm-nm] Add --line-numbers flag
Daniel Thornburgh via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 19 14:14:47 PDT 2023
Author: Daniel Thornburgh
Date: 2023-09-19T14:14:27-07:00
New Revision: c649f29c24c9fc1502d8d53e0c96c3d24b31de1a
URL: https://github.com/llvm/llvm-project/commit/c649f29c24c9fc1502d8d53e0c96c3d24b31de1a
DIFF: https://github.com/llvm/llvm-project/commit/c649f29c24c9fc1502d8d53e0c96c3d24b31de1a.diff
LOG: [llvm-nm] Add --line-numbers flag
This parallels the binutils/BSD flag of the same name. Debugging
information is loaded to print line number information for symbols.
Defined symbols are symbolized by their section addresses, and undefined
symbols by their first text reloc with line info.
Differential Revision: https://reviews.llvm.org/D150987
Added:
llvm/test/tools/llvm-nm/X86/line-numbers.test
Modified:
llvm/docs/CommandGuide/llvm-nm.rst
llvm/docs/ReleaseNotes.rst
llvm/tools/llvm-nm/CMakeLists.txt
llvm/tools/llvm-nm/Opts.td
llvm/tools/llvm-nm/llvm-nm.cpp
Removed:
################################################################################
diff --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst
index 4b1290a15665f56..7067bb0a29a195b 100644
--- a/llvm/docs/CommandGuide/llvm-nm.rst
+++ b/llvm/docs/CommandGuide/llvm-nm.rst
@@ -190,6 +190,12 @@ OPTIONS
Print just the symbol names. Alias for `--format=just-symbols``.
+.. option:: --line-numbers, -l
+
+ Use debugging information to print the filenames and line numbers where
+ symbols are defined. Undefined symbols have the location of their first
+ relocation printed instead.
+
.. option:: -m
Use Darwin format. Alias for ``--format=darwin``.
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 352420e1110db84..660bb4e70a5a707 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -177,6 +177,9 @@ Changes to the LLVM tools
* llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra
information (section name) when showing symbols.
+* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
+ debugging information to print symbols' filenames and line numbers.
+
Changes to LLDB
---------------------------------
diff --git a/llvm/test/tools/llvm-nm/X86/line-numbers.test b/llvm/test/tools/llvm-nm/X86/line-numbers.test
new file mode 100644
index 000000000000000..4b9817ab5c62d4b
--- /dev/null
+++ b/llvm/test/tools/llvm-nm/X86/line-numbers.test
@@ -0,0 +1,240 @@
+## Check that printing line numbers isn't attempted for files like bitcode,
+## which have symbols but limited/no section or debug info.
+# RUN: llvm-nm --line-numbers %p/Inputs/test.IRobj-x86_64 | FileCheck %s --check-prefix=BITCODE --match-full-lines --implicit-check-not={{.}}
+# BITCODE: ---------------- S _global_const
+# BITCODE-NEXT: ---------------- D _global_data
+# BITCODE-NEXT: ---------------- T _global_func
+# BITCODE-NEXT: ---------------- S _hidden_const
+# BITCODE-NEXT: ---------------- D _hidden_data
+# BITCODE-NEXT: ---------------- T _hidden_func
+# BITCODE-NEXT: ---------------- s _static_const
+# BITCODE-NEXT: ---------------- d _static_data
+# BITCODE-NEXT: ---------------- t _static_func
+
+## Check that various symbol types can use debug information if available to
+## print line numbers, and if unavailable, don't print anything erroneous. The
+## specific cases checked are given by the symbol names below. Other test cases
+## place requirements on the contents of the whole file, so they are kept out
+## of main.o.
+# RUN: rm -rf %t
+# RUN: split-file %s %t
+# RUN: llvm-mc -g --filetype=obj %t/main.s -o %t/main.o
+# RUN: llvm-nm -l %t/main.o | FileCheck %s --match-full-lines --implicit-check-not={{.}}
+# RUN: llvm-nm --line-numbers %t/main.o | FileCheck %s --match-full-lines --implicit-check-not={{.}}
+
+# CHECK: 0000000000001234 a absolute_symbol
+# CHECK-NEXT: 0000000000000000 d data_no_dwarf
+# CHECK-NEXT: 0000000000000000 T defined_global_function [[FILENAME:.*main.s]]:4
+# CHECK-NEXT: 0000000000000001 t defined_local_function [[FILENAME]]:7
+# CHECK-NEXT: 0000000000000000 t function_no_dwarf
+# CHECK-NEXT: U undef1 [[FILENAME]]:12
+# CHECK-NEXT: U undef2 [[FILENAME]]:14
+# CHECK-NEXT: U undef_no_reloc
+# CHECK-NEXT: 0000000000000002 t undefined_references [[FILENAME]]:12
+
+## Check that in the absence of DWARF in the whole object, no line number
+## information is printed.
+# RUN: llvm-mc --filetype=obj %t/main.s -o %t/no-dwarf.o
+# RUN: llvm-nm -l %t/no-dwarf.o | FileCheck %s --check-prefix=NO-DWARF --match-full-lines --implicit-check-not={{.}}
+
+# NO-DWARF: 0000000000001234 a absolute_symbol
+# NO-DWARF-NEXT: 0000000000000000 d data_no_dwarf
+# NO-DWARF-NEXT: 0000000000000000 T defined_global_function
+# NO-DWARF-NEXT: 0000000000000001 t defined_local_function
+# NO-DWARF-NEXT: 0000000000000000 t function_no_dwarf
+# NO-DWARF-NEXT: U undef1
+# NO-DWARF-NEXT: U undef2
+# NO-DWARF-NEXT: U undef_no_reloc
+# NO-DWARF-NEXT: 0000000000000002 t undefined_references
+
+## Check that printing line numbers for undefined values is not attempted in
+## the absence of any relocation section.
+# RUN: llvm-mc --filetype=obj %t/undef-no-reloc-sections.s -o %t/undef-no-reloc-sections.o
+# RUN: llvm-nm --line-numbers %t/undef-no-reloc-sections.o | FileCheck %s --check-prefix=UNDEF-NO-RELOC-SECTIONS --match-full-lines --implicit-check-not={{.}}
+
+# UNDEF-NO-RELOC-SECTIONS: U undef
+
+## Check that printing line numbers for undefined values does not include
+## relocations for non-text sections. This is broken out of main.s to ensure
+## that the data relocation for undef comes first.
+# RUN: llvm-mc -g --filetype=obj %t/undef-data-reloc.s -o %t/undef-data-reloc.o
+# RUN: llvm-nm --line-numbers %t/undef-data-reloc.o | FileCheck %s --check-prefix=UNDEF-DATA-RELOC --match-full-lines --implicit-check-not={{.}}
+
+# UNDEF-DATA-RELOC: 0000000000000000 r data_reloc
+# UNDEF-DATA-RELOC-NEXT: U undef
+
+## Check that line numbers can be printed for data definitions. These are broken
+## out of main.s since their DWARF cannot be generated with llvm-mc -g.
+# RUN: llvm-mc -g --filetype=obj %t/data-dwarf.s -o %t/data-dwarf.o
+# RUN: llvm-nm --line-numbers %t/data-dwarf.o | FileCheck %s --check-prefix=DATA-DWARF --match-full-lines --implicit-check-not={{.}}
+
+# DATA-DWARF: 0000000000000000 D defined_data /tmp/tmp.c:1
+
+#--- main.s
+.text
+.globl defined_global_function
+defined_global_function:
+ ret
+
+defined_local_function:
+ ret
+
+absolute_symbol = 0x1234
+
+undefined_references:
+ nop
+ .long undef1
+ nop
+ .long undef2
+ ret
+
+# Note: llvm-mc -g produces no DWARF for data.
+.data
+data_no_dwarf:
+ .byte 0
+
+.globl undef_no_reloc
+
+# Note: llvm-mc -g does not produce DWARF for non-SHF_ALLOC sections.
+.section no_alloc_text,"x", at progbits
+function_no_dwarf:
+ ret
+
+#--- undef-no-reloc-sections.s
+.globl undef
+
+#--- undef-data-reloc.s
+.globl undef
+.rodata
+data_reloc:
+ .long undef
+
+#--- data-dwarf.s
+# char defined_data = 42
+ .text
+ .file "tmp.c"
+ .file 0 "/tmp" "/tmp/tmp.c" md5 0x39602a53b15a32d6a622ca86936e88d7
+ .file 1 "tmp.c" md5 0x39602a53b15a32d6a622ca86936e88d7
+ .type defined_data, at object # @defined_data
+ .data
+ .globl defined_data
+defined_data:
+ .byte 42 # 0x2a
+ .size defined_data, 1
+
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x22 DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 12 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x1e:0xb DW_TAG_variable
+ .byte 3 # DW_AT_name
+ .long 41 # DW_AT_type
+ # DW_AT_external
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 161
+ .byte 0
+ .byte 3 # Abbrev [3] 0x29:0x4 DW_TAG_base_type
+ .byte 4 # DW_AT_name
+ .byte 6 # DW_AT_encoding
+ .byte 1 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"", at progbits
+ .long 24 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS", at progbits,1
+.Linfo_string0:
+ .asciz "Debian clang version 14.0.6" # string offset=0
+.Linfo_string1:
+ .asciz "/tmp/tmp.c" # string offset=28
+.Linfo_string2:
+ .asciz "/tmp" # string offset=39
+.Linfo_string3:
+ .asciz "defined_data" # string offset=44
+.Linfo_string4:
+ .asciz "char" # string offset=57
+ .section .debug_str_offsets,"", at progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .section .debug_addr,"", at progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad defined_data
+.Ldebug_addr_end0:
+ .ident "Debian clang version 14.0.6"
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/llvm/tools/llvm-nm/CMakeLists.txt b/llvm/tools/llvm-nm/CMakeLists.txt
index ec04f1e9d2343e4..5191e138d1c0bbb 100644
--- a/llvm/tools/llvm-nm/CMakeLists.txt
+++ b/llvm/tools/llvm-nm/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
Object
Option
Support
+ Symbolize
TargetParser
TextAPI
)
diff --git a/llvm/tools/llvm-nm/Opts.td b/llvm/tools/llvm-nm/Opts.td
index 60ac134269b3bfb..04d9f5db5cf85d7 100644
--- a/llvm/tools/llvm-nm/Opts.td
+++ b/llvm/tools/llvm-nm/Opts.td
@@ -22,6 +22,7 @@ def export_symbols : FF<"export-symbols", "Export symbol list for all inputs">;
def extern_only : FF<"extern-only", "Show only external symbols">;
defm format : Eq<"format", "Specify output format: bsd (default), posix, sysv, darwin, just-symbols">, MetaVarName<"<format>">;
def help : FF<"help", "Display this help">;
+def line_numbers : FF<"line-numbers", "Use debugging information to print symbols' filenames and line numbers">;
def no_llvm_bc : FF<"no-llvm-bc", "Disable LLVM bitcode reader">;
def no_sort : FF<"no-sort", "Show symbols in order encountered">;
def no_weak : FF<"no-weak", "Show only non-weak symbols">;
@@ -67,6 +68,7 @@ def : JoinedOrSeparate<["-"], "f">, HelpText<"Alias for --format">, Alias<format
def : F<"h", "Alias for --help">, Alias<help>;
def : F<"g", "Alias for --extern-only">, Alias<extern_only>;
def : F<"j", "Alias for --format=just-symbols">, Alias<format_EQ>, AliasArgs<["just-symbols"]>;
+def : F<"l", "Alias for --line-numbers">, Alias<line_numbers>;
def : F<"m", "Alias for --format=darwin">, Alias<format_EQ>, AliasArgs<["darwin"]>;
def : F<"M", "Deprecated alias for --print-armap">, Alias<print_armap>, Flags<[HelpHidden]>;
def : F<"n", "Alias for --numeric-sort">, Alias<numeric_sort>;
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index 9a9e8bd146bb659..051fa3e5bfa5a98 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -19,6 +19,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -97,6 +98,7 @@ static bool Demangle;
static bool DynamicSyms;
static bool ExportSymbols;
static bool ExternalOnly;
+static bool LineNumbers;
static OutputFormatTy OutputFormat;
static bool NoLLVMBitcode;
static bool NoSort;
@@ -551,8 +553,6 @@ static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
}
}
}
-
- outs() << "\n";
}
// Table that maps Darwin's Mach-O stab constants to strings to allow printing.
@@ -689,9 +689,88 @@ static void printExportSymbolList(const std::vector<NMSymbol> &SymbolList) {
}
}
+static void printLineNumbers(symbolize::LLVMSymbolizer &Symbolizer,
+ const NMSymbol &S) {
+ const auto *Obj = dyn_cast<ObjectFile>(S.Sym.getObject());
+ if (!Obj)
+ return;
+ const SymbolRef Sym(S.Sym);
+ uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+ section_iterator Sec = cantFail(Sym.getSection());
+ if (Sec != Obj->section_end())
+ SectionIndex = Sec->getIndex();
+ object::SectionedAddress Address = {cantFail(Sym.getAddress()), SectionIndex};
+
+ std::string FileName;
+ uint32_t Line;
+ switch (S.TypeChar) {
+ // For undefined symbols, find the first relocation for that symbol with a
+ // line number.
+ case 'U': {
+ for (const SectionRef RelocsSec : Obj->sections()) {
+ if (RelocsSec.relocations().empty())
+ continue;
+ SectionRef TextSec = *cantFail(RelocsSec.getRelocatedSection());
+ if (!TextSec.isText())
+ continue;
+ for (const RelocationRef R : RelocsSec.relocations()) {
+ if (R.getSymbol() != Sym)
+ continue;
+ Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
+ *Obj, {TextSec.getAddress() + R.getOffset(), SectionIndex});
+ if (!ResOrErr) {
+ error(ResOrErr.takeError(), Obj->getFileName());
+ return;
+ }
+ if (ResOrErr->FileName == DILineInfo::BadString)
+ return;
+ FileName = std::move(ResOrErr->FileName);
+ Line = ResOrErr->Line;
+ break;
+ }
+ if (!FileName.empty())
+ break;
+ }
+ if (FileName.empty())
+ return;
+ break;
+ }
+ case 't':
+ case 'T': {
+ Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(*Obj, Address);
+ if (!ResOrErr) {
+ error(ResOrErr.takeError(), Obj->getFileName());
+ return;
+ }
+ if (ResOrErr->FileName == DILineInfo::BadString)
+ return;
+ FileName = std::move(ResOrErr->FileName);
+ Line = ResOrErr->Line;
+ break;
+ }
+ default: {
+ Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(*Obj, Address);
+ if (!ResOrErr) {
+ error(ResOrErr.takeError(), Obj->getFileName());
+ return;
+ }
+ if (ResOrErr->DeclFile.empty())
+ return;
+ FileName = std::move(ResOrErr->DeclFile);
+ Line = ResOrErr->DeclLine;
+ break;
+ }
+ }
+ outs() << '\t' << FileName << ':' << Line;
+}
+
static void printSymbolList(SymbolicFile &Obj,
std::vector<NMSymbol> &SymbolList, bool printName,
StringRef ArchiveName, StringRef ArchitectureName) {
+ std::optional<symbolize::LLVMSymbolizer> Symbolizer;
+ if (LineNumbers)
+ Symbolizer.emplace();
+
if (!PrintFileName) {
if ((OutputFormat == bsd || OutputFormat == posix ||
OutputFormat == just_symbols) &&
@@ -798,7 +877,7 @@ static void printSymbolList(SymbolicFile &Obj,
printFormat);
} else if (OutputFormat == posix) {
outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
- << (MachO ? "0" : SymbolSizeStr) << "\n";
+ << (MachO ? "0" : SymbolSizeStr);
} else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
if (PrintAddress)
outs() << SymbolAddrStr << ' ';
@@ -819,12 +898,14 @@ static void printSymbolList(SymbolicFile &Obj,
} else
outs() << S.IndirectName << ")";
}
- outs() << "\n";
} else if (OutputFormat == sysv) {
outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "| "
<< S.TypeChar << " |" << right_justify(S.TypeName, 18) << "|"
- << SymbolSizeStr << "| |" << S.SectionName << "\n";
+ << SymbolSizeStr << "| |" << S.SectionName;
}
+ if (LineNumbers)
+ printLineNumbers(*Symbolizer, S);
+ outs() << '\n';
}
SymbolList.clear();
@@ -2415,6 +2496,7 @@ int llvm_nm_main(int argc, char **argv, const llvm::ToolContext &) {
else
error("--format value should be one of: bsd, posix, sysv, darwin, "
"just-symbols");
+ LineNumbers = Args.hasArg(OPT_line_numbers);
NoLLVMBitcode = Args.hasArg(OPT_no_llvm_bc);
NoSort = Args.hasArg(OPT_no_sort);
NoWeakSymbols = Args.hasArg(OPT_no_weak);
More information about the llvm-commits
mailing list