[llvm] ea99225 - [Symbolizer] Handle {{{bt}}} symbolizer markup element.

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 31 09:49:40 PDT 2022


Author: Daniel Thornburgh
Date: 2022-08-31T09:49:32-07:00
New Revision: ea99225521cba6dec1ad4ca70a8665829e772fa9

URL: https://github.com/llvm/llvm-project/commit/ea99225521cba6dec1ad4ca70a8665829e772fa9
DIFF: https://github.com/llvm/llvm-project/commit/ea99225521cba6dec1ad4ca70a8665829e772fa9.diff

LOG: [Symbolizer] Handle {{{bt}}} symbolizer markup element.

This adds support for backtrace generation to the llvm-symbolizer markup
filter, which is likely the largest use case.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D132706

Added: 
    llvm/test/DebugInfo/symbolize-filter-markup-bt.test

Modified: 
    llvm/docs/CommandGuide/llvm-symbolizer.rst
    llvm/docs/SymbolizerMarkupFormat.rst
    llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
    llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 5870e0e9e95e6..abb174c7579ee 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -254,9 +254,21 @@ OPTIONS
   and prints the results to standard output. The following markup elements are
   not yet supported:
 
-  * ``{{bt}}``
-  * ``{{hexdict}}``
-  * ``{{dumpfile}}``
+  * ``{{{hexdict}}}``
+  * ``{{{dumpfile}}}``
+
+  The ``{{{bt}}}`` backtrace element reports frames using the following syntax:
+
+  ``#<number>[.<inline>] <address> <function> <file>:<line>:<col> (<module>+<relative address>)``
+
+  ``<inline>`` provides frame numbers for calls inlined into the caller
+  coresponding to ``<number>``. The inlined call numbers start at 1 and increase
+  from callee to caller.
+
+  ``<address>`` is an address inside the call instruction to the function.  The
+  address may not be the start of the instruction.  ``<relative address>`` is
+  the corresponding virtual offset in the ``<module>`` loaded at that address.
+
 
 .. _llvm-symbolizer-opt-f:
 

diff  --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst
index b06cc20f41ef4..169e57a3aa8a9 100644
--- a/llvm/docs/SymbolizerMarkupFormat.rst
+++ b/llvm/docs/SymbolizerMarkupFormat.rst
@@ -205,7 +205,7 @@ human-readable symbolic form.
     {{{data:0x12345678}}}
     {{{data:0xffffffff9abcdef0}}}
 
-``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}`` [#not_yet_implemented]_
+``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}``
 
   This represents one frame in a backtrace. It usually appears on a line by
   itself (surrounded only by whitespace), in a sequence of such lines with

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index b597e9ba7ba25..a54f8f5d2db81 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -97,6 +97,7 @@ class MarkupFilter {
   bool tryPresentation(const MarkupNode &Node);
   bool trySymbol(const MarkupNode &Node);
   bool tryPC(const MarkupNode &Node);
+  bool tryBackTrace(const MarkupNode &Node);
   bool tryData(const MarkupNode &Node);
 
   bool trySGR(const MarkupNode &Node);
@@ -118,6 +119,7 @@ class MarkupFilter {
   Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
   Optional<std::string> parseMode(StringRef Str) const;
   Optional<PCType> parsePCType(StringRef Str) const;
+  Optional<uint64_t> parseFrameNumber(StringRef Str) const;
 
   bool checkTag(const MarkupNode &Node) const;
   bool checkNumFields(const MarkupNode &Element, size_t Size) const;

diff  --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 70fb49d74b3a1..d96c0c85d5bd1 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -216,6 +217,8 @@ bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
     return true;
   if (tryPC(Node))
     return true;
+  if (tryBackTrace(Node))
+    return true;
   return tryData(Node);
 }
 
@@ -269,8 +272,7 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
     printRawElement(Node);
     return true;
   }
-  if (LI->FileName == DILineInfo::BadString &&
-      LI->FunctionName == DILineInfo::BadString && LI->Line == 0) {
+  if (!*LI) {
     printRawElement(Node);
     return true;
   }
@@ -286,6 +288,87 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
   return true;
 }
 
+bool MarkupFilter::tryBackTrace(const MarkupNode &Node) {
+  if (Node.Tag != "bt")
+    return false;
+  if (!checkNumFieldsAtLeast(Node, 2))
+    return true;
+  if (!checkNumFieldsAtMost(Node, 3))
+    return true;
+
+  Optional<uint64_t> FrameNumber = parseFrameNumber(Node.Fields[0]);
+  if (!FrameNumber)
+    return true;
+
+  Optional<uint64_t> Addr = parseAddr(Node.Fields[1]);
+  if (!Addr)
+    return true;
+
+  // Backtrace addresses are assumed to be return addresses by default.
+  PCType Type = PCType::ReturnAddress;
+  if (Node.Fields.size() == 3) {
+    Optional<PCType> ParsedType = parsePCType(Node.Fields[2]);
+    if (!ParsedType)
+      return true;
+    Type = *ParsedType;
+  }
+  *Addr = adjustAddr(*Addr, Type);
+
+  const MMap *MMap = getContainingMMap(*Addr);
+  if (!MMap) {
+    WithColor::error() << "no mmap covers address\n";
+    reportLocation(Node.Fields[0].begin());
+    printRawElement(Node);
+    return true;
+  }
+  uint64_t MRA = MMap->getModuleRelativeAddr(*Addr);
+
+  Expected<DIInliningInfo> II =
+      Symbolizer.symbolizeInlinedCode(MMap->Mod->BuildID, {MRA});
+  if (!II) {
+    WithColor::defaultErrorHandler(II.takeError());
+    printRawElement(Node);
+    return true;
+  }
+
+  highlight();
+  for (unsigned I = 0, E = II->getNumberOfFrames(); I != E; ++I) {
+    auto Header = formatv("{0, +6}", formatv("#{0}", FrameNumber)).sstr<16>();
+    // Don't highlight the # sign as a value.
+    size_t NumberIdx = Header.find("#") + 1;
+    OS << Header.substr(0, NumberIdx);
+    printValue(Header.substr(NumberIdx));
+    if (I == E - 1) {
+      OS << "   ";
+    } else {
+      OS << '.';
+      printValue(formatv("{0, -2}", I + 1));
+    }
+    printValue(formatv(" {0:x16} ", *Addr));
+
+    DILineInfo LI = II->getFrame(I);
+    if (LI) {
+      printValue(LI.FunctionName);
+      OS << ' ';
+      printValue(LI.FileName);
+      OS << ':';
+      printValue(Twine(LI.Line));
+      OS << ':';
+      printValue(Twine(LI.Column));
+      OS << ' ';
+    }
+    OS << '(';
+    printValue(MMap->Mod->Name);
+    OS << "+";
+    printValue(formatv("{0:x}", MRA));
+    OS << ')';
+    if (I != E - 1)
+      OS << lineEnding();
+  }
+  restoreColor();
+  return true;
+}
+
 bool MarkupFilter::tryData(const MarkupNode &Node) {
   if (Node.Tag != "data")
     return false;
@@ -502,6 +585,16 @@ Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const {
   return ID;
 }
 
+// Parse a frame number (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const {
+  uint64_t ID;
+  if (Str.getAsInteger(10, ID)) {
+    reportTypeError(Str, "frame number");
+    return None;
+  }
+  return ID;
+}
+
 // Parse a build ID (%x in the spec).
 Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const {
   std::string Bytes;

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-bt.test b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test
new file mode 100644
index 0000000000000..a1701135f2d9a
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test
@@ -0,0 +1,213 @@
+REQUIRES: x86-registered-target
+RUN: split-file %s %t
+RUN: mkdir -p %t/.build-id/ab
+RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \
+RUN:   -o %t/.build-id/ab/cdef.debug
+RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \
+RUN:   > %t.output 2> %t.err
+RUN: FileCheck %s --input-file=%t.output --match-full-lines \
+RUN:   --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x10-0x10f](r)[[END:\]{3}]]
+CHECK:    #0.1  0x0000000000000018 second /tmp[[SEP:[/\\]]]tmp.c:8:3 (a.o+0x8)
+CHECK:    #0    0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK:    #1.1  0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8)
+CHECK:    #1    0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK:    #0.1  0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8)
+CHECK:    #0    0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK:    #0    0x0000000000000019 first /tmp[[SEP]]tmp.c:5:1 (a.o+0x9)
+CHECK:    #0    0x00000000000000fe (a.o+0xee)
+CHECK: [[BEGIN]]bt:0:0x111[[END]]
+
+ERR: error: expected at least 2 field(s); found 0
+ERR: error: no mmap covers address
+ERR: error: expected PC type; found ''
+ERR: error: expected at most 3 field(s); found 4
+
+;--- input
+{{{module:0:a.o:elf:abcdef}}}
+{{{mmap:0x10:256:load:0:r:0}}}
+{{{bt:0:0x19}}}
+{{{bt:1:0x19}}}
+{{{bt:0:0x19:ra}}}
+{{{bt:0:0x19:pc}}}
+{{{bt:0:0xff}}}
+
+{{{bt}}}
+{{{bt:0:0x111}}}
+{{{bt:0:0:}}}
+{{{bt:0:0:pc:}}}
+;--- asm.s
+# Generated by running "clang -finline -g -S tmp.c" in the following tmp.c on
+# Linux x86_64:
+#
+# static void second(void);
+# void first(void) {
+#   second();
+# }
+# void second(void) {}
+	.text
+	.file	"tmp.c"
+	.globl	first                           # -- Begin function first
+	.p2align	4, 0x90
+	.type	first, at function
+first:                                  # @first
+.Lfunc_begin0:
+	.file	1 "/tmp" "tmp.c"
+	.loc	1 3 0                           # tmp.c:3:0
+	.cfi_startproc
+# %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+.Ltmp0:
+	.loc	1 8 3 prologue_end              # tmp.c:8:3
+	callq	first
+.Ltmp1:
+	.loc	1 5 1                           # tmp.c:5:1
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp2:
+.Lfunc_end0:
+	.size	first, .Lfunc_end0-first
+	.cfi_endproc
+                                        # -- End function
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	14                              # DW_FORM_strp
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	14                              # DW_FORM_strp
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	39                              # DW_AT_prototyped
+	.byte	25                              # DW_FORM_flag_present
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	32                              # DW_AT_inline
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	39                              # DW_AT_prototyped
+	.byte	25                              # DW_FORM_flag_present
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	29                              # DW_TAG_inlined_subroutine
+	.byte	0                               # DW_CHILDREN_no
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	88                              # DW_AT_call_file
+	.byte	11                              # DW_FORM_data1
+	.byte	89                              # DW_AT_call_line
+	.byte	11                              # DW_FORM_data1
+	.byte	87                              # DW_AT_call_column
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	4                               # DWARF version number
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	8                               # Address Size (in bytes)
+	.byte	1                               # Abbrev [1] 0xb:0x52 DW_TAG_compile_unit
+	.long	.Linfo_string0                  # DW_AT_producer
+	.short	12                              # DW_AT_language
+	.long	.Linfo_string1                  # DW_AT_name
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.long	.Linfo_string2                  # DW_AT_comp_dir
+	.quad	.Lfunc_begin0                   # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	2                               # Abbrev [2] 0x2a:0x8 DW_TAG_subprogram
+	.long	.Linfo_string3                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	7                               # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+	.byte	1                               # DW_AT_inline
+	.byte	3                               # Abbrev [3] 0x32:0x2a DW_TAG_subprogram
+	.quad	.Lfunc_begin0                   # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.long	.Linfo_string4                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	3                               # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+	.byte	4                               # Abbrev [4] 0x47:0x14 DW_TAG_inlined_subroutine
+	.long	42                              # DW_AT_abstract_origin
+	.quad	.Ltmp0                          # DW_AT_low_pc
+	.long	.Ltmp1-.Ltmp0                   # DW_AT_high_pc
+	.byte	1                               # DW_AT_call_file
+	.byte	4                               # DW_AT_call_line
+	.byte	3                               # DW_AT_call_column
+	.byte	0                               # End Of Children Mark
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"Debian clang version 13.0.1-6" # string offset=0
+.Linfo_string1:
+	.asciz	"tmp.c"                         # string offset=30
+.Linfo_string2:
+	.asciz	"/tmp" # string offset=36
+.Linfo_string3:
+	.asciz	"second"                        # string offset=85
+.Linfo_string4:
+	.asciz	"first"                         # string offset=92
+	.ident	"Debian clang version 13.0.1-6"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.addrsig_sym first
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:


        


More information about the llvm-commits mailing list