[llvm] c649f29 - [llvm-nm] Add --line-numbers flag

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 19 14:14:47 PDT 2023


Author: Daniel Thornburgh
Date: 2023-09-19T14:14:27-07:00
New Revision: c649f29c24c9fc1502d8d53e0c96c3d24b31de1a

URL: https://github.com/llvm/llvm-project/commit/c649f29c24c9fc1502d8d53e0c96c3d24b31de1a
DIFF: https://github.com/llvm/llvm-project/commit/c649f29c24c9fc1502d8d53e0c96c3d24b31de1a.diff

LOG: [llvm-nm] Add --line-numbers flag

This parallels the binutils/BSD flag of the same name. Debugging
information is loaded to print line number information for symbols.
Defined symbols are symbolized by their section addresses, and undefined
symbols by their first text reloc with line info.

Differential Revision: https://reviews.llvm.org/D150987

Added: 
    llvm/test/tools/llvm-nm/X86/line-numbers.test

Modified: 
    llvm/docs/CommandGuide/llvm-nm.rst
    llvm/docs/ReleaseNotes.rst
    llvm/tools/llvm-nm/CMakeLists.txt
    llvm/tools/llvm-nm/Opts.td
    llvm/tools/llvm-nm/llvm-nm.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst
index 4b1290a15665f56..7067bb0a29a195b 100644
--- a/llvm/docs/CommandGuide/llvm-nm.rst
+++ b/llvm/docs/CommandGuide/llvm-nm.rst
@@ -190,6 +190,12 @@ OPTIONS
 
  Print just the symbol names. Alias for `--format=just-symbols``.
 
+.. option:: --line-numbers, -l
+
+ Use debugging information to print the filenames and line numbers where
+ symbols are defined. Undefined symbols have the location of their first
+ relocation printed instead.
+
 .. option:: -m
 
  Use Darwin format. Alias for ``--format=darwin``.

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 352420e1110db84..660bb4e70a5a707 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -177,6 +177,9 @@ Changes to the LLVM tools
 * llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra
   information (section name) when showing symbols.
 
+* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
+  debugging information to print symbols' filenames and line numbers.
+
 Changes to LLDB
 ---------------------------------
 

diff  --git a/llvm/test/tools/llvm-nm/X86/line-numbers.test b/llvm/test/tools/llvm-nm/X86/line-numbers.test
new file mode 100644
index 000000000000000..4b9817ab5c62d4b
--- /dev/null
+++ b/llvm/test/tools/llvm-nm/X86/line-numbers.test
@@ -0,0 +1,240 @@
+## Check that printing line numbers isn't attempted for files like bitcode,
+## which have symbols but limited/no section or debug info.
+# RUN: llvm-nm --line-numbers %p/Inputs/test.IRobj-x86_64 | FileCheck %s --check-prefix=BITCODE --match-full-lines --implicit-check-not={{.}}
+# BITCODE:      ---------------- S _global_const
+# BITCODE-NEXT: ---------------- D _global_data
+# BITCODE-NEXT: ---------------- T _global_func
+# BITCODE-NEXT: ---------------- S _hidden_const
+# BITCODE-NEXT: ---------------- D _hidden_data
+# BITCODE-NEXT: ---------------- T _hidden_func
+# BITCODE-NEXT: ---------------- s _static_const
+# BITCODE-NEXT: ---------------- d _static_data
+# BITCODE-NEXT: ---------------- t _static_func
+
+## Check that various symbol types can use debug information if available to
+## print line numbers, and if unavailable, don't print anything erroneous. The
+## specific cases checked are given by the symbol names below. Other test cases
+## place requirements on the contents of the whole file, so they are kept out
+## of main.o.
+# RUN: rm -rf %t
+# RUN: split-file %s %t
+# RUN: llvm-mc -g --filetype=obj %t/main.s -o %t/main.o
+# RUN: llvm-nm -l %t/main.o | FileCheck %s --match-full-lines --implicit-check-not={{.}}
+# RUN: llvm-nm --line-numbers %t/main.o | FileCheck %s --match-full-lines --implicit-check-not={{.}}
+
+# CHECK:      0000000000001234 a absolute_symbol
+# CHECK-NEXT: 0000000000000000 d data_no_dwarf
+# CHECK-NEXT: 0000000000000000 T defined_global_function [[FILENAME:.*main.s]]:4
+# CHECK-NEXT: 0000000000000001 t defined_local_function [[FILENAME]]:7
+# CHECK-NEXT: 0000000000000000 t function_no_dwarf
+# CHECK-NEXT:                  U undef1 [[FILENAME]]:12
+# CHECK-NEXT:                  U undef2 [[FILENAME]]:14
+# CHECK-NEXT:                  U undef_no_reloc
+# CHECK-NEXT: 0000000000000002 t undefined_references [[FILENAME]]:12
+
+## Check that in the absence of DWARF in the whole object, no line number
+## information is printed.
+# RUN: llvm-mc --filetype=obj %t/main.s -o %t/no-dwarf.o
+# RUN: llvm-nm -l %t/no-dwarf.o | FileCheck %s --check-prefix=NO-DWARF --match-full-lines --implicit-check-not={{.}}
+
+# NO-DWARF:      0000000000001234 a absolute_symbol
+# NO-DWARF-NEXT: 0000000000000000 d data_no_dwarf
+# NO-DWARF-NEXT: 0000000000000000 T defined_global_function
+# NO-DWARF-NEXT: 0000000000000001 t defined_local_function
+# NO-DWARF-NEXT: 0000000000000000 t function_no_dwarf
+# NO-DWARF-NEXT:                  U undef1
+# NO-DWARF-NEXT:                  U undef2
+# NO-DWARF-NEXT:                  U undef_no_reloc
+# NO-DWARF-NEXT: 0000000000000002 t undefined_references
+
+## Check that printing line numbers for undefined values is not attempted in
+## the absence of any relocation section.
+# RUN: llvm-mc --filetype=obj %t/undef-no-reloc-sections.s -o %t/undef-no-reloc-sections.o
+# RUN: llvm-nm --line-numbers %t/undef-no-reloc-sections.o | FileCheck %s --check-prefix=UNDEF-NO-RELOC-SECTIONS --match-full-lines --implicit-check-not={{.}}
+
+# UNDEF-NO-RELOC-SECTIONS: U undef
+
+## Check that printing line numbers for undefined values does not include
+## relocations for non-text sections. This is broken out of main.s to ensure
+## that the data relocation for undef comes first.
+# RUN: llvm-mc -g --filetype=obj %t/undef-data-reloc.s -o %t/undef-data-reloc.o
+# RUN: llvm-nm --line-numbers %t/undef-data-reloc.o | FileCheck %s --check-prefix=UNDEF-DATA-RELOC --match-full-lines --implicit-check-not={{.}}
+
+# UNDEF-DATA-RELOC:      0000000000000000 r data_reloc
+# UNDEF-DATA-RELOC-NEXT:                  U undef
+
+## Check that line numbers can be printed for data definitions. These are broken
+## out of main.s since their DWARF cannot be generated with llvm-mc -g.
+# RUN: llvm-mc -g --filetype=obj %t/data-dwarf.s -o %t/data-dwarf.o
+# RUN: llvm-nm --line-numbers %t/data-dwarf.o | FileCheck %s --check-prefix=DATA-DWARF --match-full-lines --implicit-check-not={{.}}
+
+# DATA-DWARF: 0000000000000000 D defined_data /tmp/tmp.c:1
+
+#--- main.s
+.text
+.globl defined_global_function
+defined_global_function:
+  ret
+
+defined_local_function:
+  ret
+
+absolute_symbol = 0x1234
+
+undefined_references:
+  nop
+  .long undef1
+  nop
+  .long undef2
+  ret
+
+# Note: llvm-mc -g produces no DWARF for data.
+.data
+data_no_dwarf:
+  .byte 0
+
+.globl undef_no_reloc
+
+# Note: llvm-mc -g does not produce DWARF for non-SHF_ALLOC sections.
+.section no_alloc_text,"x", at progbits
+function_no_dwarf:
+  ret
+
+#--- undef-no-reloc-sections.s
+.globl undef
+
+#--- undef-data-reloc.s
+.globl undef
+.rodata
+data_reloc:
+  .long undef
+
+#--- data-dwarf.s
+# char defined_data = 42
+	.text
+	.file	"tmp.c"
+	.file	0 "/tmp" "/tmp/tmp.c" md5 0x39602a53b15a32d6a622ca86936e88d7
+	.file	1 "tmp.c" md5 0x39602a53b15a32d6a622ca86936e88d7
+	.type	defined_data, at object            # @defined_data
+	.data
+	.globl	defined_data
+defined_data:
+	.byte	42                              # 0x2a
+	.size	defined_data, 1
+
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	1                               # Abbrev [1] 0xc:0x22 DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	12                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.long	.Laddr_table_base0              # DW_AT_addr_base
+	.byte	2                               # Abbrev [2] 0x1e:0xb DW_TAG_variable
+	.byte	3                               # DW_AT_name
+	.long	41                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	1                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.byte	2                               # DW_AT_location
+	.byte	161
+	.byte	0
+	.byte	3                               # Abbrev [3] 0x29:0x4 DW_TAG_base_type
+	.byte	4                               # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str_offsets,"", at progbits
+	.long	24                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"Debian clang version 14.0.6"   # string offset=0
+.Linfo_string1:
+	.asciz	"/tmp/tmp.c"                    # string offset=28
+.Linfo_string2:
+	.asciz	"/tmp"                          # string offset=39
+.Linfo_string3:
+	.asciz	"defined_data"                  # string offset=44
+.Linfo_string4:
+	.asciz	"char"                          # string offset=57
+	.section	.debug_str_offsets,"", at progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string3
+	.long	.Linfo_string4
+	.section	.debug_addr,"", at progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	defined_data
+.Ldebug_addr_end0:
+	.ident	"Debian clang version 14.0.6"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:

diff  --git a/llvm/tools/llvm-nm/CMakeLists.txt b/llvm/tools/llvm-nm/CMakeLists.txt
index ec04f1e9d2343e4..5191e138d1c0bbb 100644
--- a/llvm/tools/llvm-nm/CMakeLists.txt
+++ b/llvm/tools/llvm-nm/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
   Object
   Option
   Support
+  Symbolize
   TargetParser
   TextAPI
   )

diff  --git a/llvm/tools/llvm-nm/Opts.td b/llvm/tools/llvm-nm/Opts.td
index 60ac134269b3bfb..04d9f5db5cf85d7 100644
--- a/llvm/tools/llvm-nm/Opts.td
+++ b/llvm/tools/llvm-nm/Opts.td
@@ -22,6 +22,7 @@ def export_symbols : FF<"export-symbols", "Export symbol list for all inputs">;
 def extern_only : FF<"extern-only", "Show only external symbols">;
 defm format : Eq<"format", "Specify output format: bsd (default), posix, sysv, darwin, just-symbols">, MetaVarName<"<format>">;
 def help : FF<"help", "Display this help">;
+def line_numbers : FF<"line-numbers", "Use debugging information to print symbols' filenames and line numbers">;
 def no_llvm_bc : FF<"no-llvm-bc", "Disable LLVM bitcode reader">;
 def no_sort : FF<"no-sort", "Show symbols in order encountered">;
 def no_weak : FF<"no-weak", "Show only non-weak symbols">;
@@ -67,6 +68,7 @@ def : JoinedOrSeparate<["-"], "f">, HelpText<"Alias for --format">, Alias<format
 def : F<"h", "Alias for --help">, Alias<help>;
 def : F<"g", "Alias for --extern-only">, Alias<extern_only>;
 def : F<"j", "Alias for --format=just-symbols">, Alias<format_EQ>, AliasArgs<["just-symbols"]>;
+def : F<"l", "Alias for --line-numbers">, Alias<line_numbers>;
 def : F<"m", "Alias for --format=darwin">, Alias<format_EQ>, AliasArgs<["darwin"]>;
 def : F<"M", "Deprecated alias for --print-armap">, Alias<print_armap>, Flags<[HelpHidden]>;
 def : F<"n", "Alias for --numeric-sort">, Alias<numeric_sort>;

diff  --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index 9a9e8bd146bb659..051fa3e5bfa5a98 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -19,6 +19,7 @@
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -97,6 +98,7 @@ static bool Demangle;
 static bool DynamicSyms;
 static bool ExportSymbols;
 static bool ExternalOnly;
+static bool LineNumbers;
 static OutputFormatTy OutputFormat;
 static bool NoLLVMBitcode;
 static bool NoSort;
@@ -551,8 +553,6 @@ static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
       }
     }
   }
-
-  outs() << "\n";
 }
 
 // Table that maps Darwin's Mach-O stab constants to strings to allow printing.
@@ -689,9 +689,88 @@ static void printExportSymbolList(const std::vector<NMSymbol> &SymbolList) {
   }
 }
 
+static void printLineNumbers(symbolize::LLVMSymbolizer &Symbolizer,
+                             const NMSymbol &S) {
+  const auto *Obj = dyn_cast<ObjectFile>(S.Sym.getObject());
+  if (!Obj)
+    return;
+  const SymbolRef Sym(S.Sym);
+  uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+  section_iterator Sec = cantFail(Sym.getSection());
+  if (Sec != Obj->section_end())
+    SectionIndex = Sec->getIndex();
+  object::SectionedAddress Address = {cantFail(Sym.getAddress()), SectionIndex};
+
+  std::string FileName;
+  uint32_t Line;
+  switch (S.TypeChar) {
+  // For undefined symbols, find the first relocation for that symbol with a
+  // line number.
+  case 'U': {
+    for (const SectionRef RelocsSec : Obj->sections()) {
+      if (RelocsSec.relocations().empty())
+        continue;
+      SectionRef TextSec = *cantFail(RelocsSec.getRelocatedSection());
+      if (!TextSec.isText())
+        continue;
+      for (const RelocationRef R : RelocsSec.relocations()) {
+        if (R.getSymbol() != Sym)
+          continue;
+        Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
+            *Obj, {TextSec.getAddress() + R.getOffset(), SectionIndex});
+        if (!ResOrErr) {
+          error(ResOrErr.takeError(), Obj->getFileName());
+          return;
+        }
+        if (ResOrErr->FileName == DILineInfo::BadString)
+          return;
+        FileName = std::move(ResOrErr->FileName);
+        Line = ResOrErr->Line;
+        break;
+      }
+      if (!FileName.empty())
+        break;
+    }
+    if (FileName.empty())
+      return;
+    break;
+  }
+  case 't':
+  case 'T': {
+    Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(*Obj, Address);
+    if (!ResOrErr) {
+      error(ResOrErr.takeError(), Obj->getFileName());
+      return;
+    }
+    if (ResOrErr->FileName == DILineInfo::BadString)
+      return;
+    FileName = std::move(ResOrErr->FileName);
+    Line = ResOrErr->Line;
+    break;
+  }
+  default: {
+    Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(*Obj, Address);
+    if (!ResOrErr) {
+      error(ResOrErr.takeError(), Obj->getFileName());
+      return;
+    }
+    if (ResOrErr->DeclFile.empty())
+      return;
+    FileName = std::move(ResOrErr->DeclFile);
+    Line = ResOrErr->DeclLine;
+    break;
+  }
+  }
+  outs() << '\t' << FileName << ':' << Line;
+}
+
 static void printSymbolList(SymbolicFile &Obj,
                             std::vector<NMSymbol> &SymbolList, bool printName,
                             StringRef ArchiveName, StringRef ArchitectureName) {
+  std::optional<symbolize::LLVMSymbolizer> Symbolizer;
+  if (LineNumbers)
+    Symbolizer.emplace();
+
   if (!PrintFileName) {
     if ((OutputFormat == bsd || OutputFormat == posix ||
          OutputFormat == just_symbols) &&
@@ -798,7 +877,7 @@ static void printSymbolList(SymbolicFile &Obj,
                         printFormat);
     } else if (OutputFormat == posix) {
       outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
-             << (MachO ? "0" : SymbolSizeStr) << "\n";
+             << (MachO ? "0" : SymbolSizeStr);
     } else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
       if (PrintAddress)
         outs() << SymbolAddrStr << ' ';
@@ -819,12 +898,14 @@ static void printSymbolList(SymbolicFile &Obj,
         } else
           outs() << S.IndirectName << ")";
       }
-      outs() << "\n";
     } else if (OutputFormat == sysv) {
       outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "|   "
              << S.TypeChar << "  |" << right_justify(S.TypeName, 18) << "|"
-             << SymbolSizeStr << "|     |" << S.SectionName << "\n";
+             << SymbolSizeStr << "|     |" << S.SectionName;
     }
+    if (LineNumbers)
+      printLineNumbers(*Symbolizer, S);
+    outs() << '\n';
   }
 
   SymbolList.clear();
@@ -2415,6 +2496,7 @@ int llvm_nm_main(int argc, char **argv, const llvm::ToolContext &) {
   else
     error("--format value should be one of: bsd, posix, sysv, darwin, "
           "just-symbols");
+  LineNumbers = Args.hasArg(OPT_line_numbers);
   NoLLVMBitcode = Args.hasArg(OPT_no_llvm_bc);
   NoSort = Args.hasArg(OPT_no_sort);
   NoWeakSymbols = Args.hasArg(OPT_no_weak);


        


More information about the llvm-commits mailing list