[llvm-branch-commits] [llvm] 7ce1ec5 - [Symbolizer] Handle {{{bt}}} symbolizer markup element.
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Sep 6 23:38:29 PDT 2022
Author: Daniel Thornburgh
Date: 2022-09-07T08:37:30+02:00
New Revision: 7ce1ec5a2a22fec588bd5f87a50052a97f7dd846
URL: https://github.com/llvm/llvm-project/commit/7ce1ec5a2a22fec588bd5f87a50052a97f7dd846
DIFF: https://github.com/llvm/llvm-project/commit/7ce1ec5a2a22fec588bd5f87a50052a97f7dd846.diff
LOG: [Symbolizer] Handle {{{bt}}} symbolizer markup element.
This adds support for backtrace generation to the llvm-symbolizer markup
filter, which is likely the largest use case.
Reviewed By: peter.smith
Differential Revision: https://reviews.llvm.org/D132706
(cherry picked from commit ea99225521cba6dec1ad4ca70a8665829e772fa9)
Added:
llvm/test/DebugInfo/symbolize-filter-markup-bt.test
Modified:
llvm/docs/CommandGuide/llvm-symbolizer.rst
llvm/docs/SymbolizerMarkupFormat.rst
llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
Removed:
################################################################################
diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 5870e0e9e95e6..abb174c7579ee 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -254,9 +254,21 @@ OPTIONS
and prints the results to standard output. The following markup elements are
not yet supported:
- * ``{{bt}}``
- * ``{{hexdict}}``
- * ``{{dumpfile}}``
+ * ``{{{hexdict}}}``
+ * ``{{{dumpfile}}}``
+
+ The ``{{{bt}}}`` backtrace element reports frames using the following syntax:
+
+ ``#<number>[.<inline>] <address> <function> <file>:<line>:<col> (<module>+<relative address>)``
+
+ ``<inline>`` provides frame numbers for calls inlined into the caller
+ coresponding to ``<number>``. The inlined call numbers start at 1 and increase
+ from callee to caller.
+
+ ``<address>`` is an address inside the call instruction to the function. The
+ address may not be the start of the instruction. ``<relative address>`` is
+ the corresponding virtual offset in the ``<module>`` loaded at that address.
+
.. _llvm-symbolizer-opt-f:
diff --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst
index b06cc20f41ef4..169e57a3aa8a9 100644
--- a/llvm/docs/SymbolizerMarkupFormat.rst
+++ b/llvm/docs/SymbolizerMarkupFormat.rst
@@ -205,7 +205,7 @@ human-readable symbolic form.
{{{data:0x12345678}}}
{{{data:0xffffffff9abcdef0}}}
-``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}`` [#not_yet_implemented]_
+``{{{bt:%u:%p}}}``, ``{{{bt:%u:%p:ra}}}``, ``{{{bt:%u:%p:pc}}}``
This represents one frame in a backtrace. It usually appears on a line by
itself (surrounded only by whitespace), in a sequence of such lines with
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index b597e9ba7ba25..a54f8f5d2db81 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -97,6 +97,7 @@ class MarkupFilter {
bool tryPresentation(const MarkupNode &Node);
bool trySymbol(const MarkupNode &Node);
bool tryPC(const MarkupNode &Node);
+ bool tryBackTrace(const MarkupNode &Node);
bool tryData(const MarkupNode &Node);
bool trySGR(const MarkupNode &Node);
@@ -118,6 +119,7 @@ class MarkupFilter {
Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
Optional<std::string> parseMode(StringRef Str) const;
Optional<PCType> parsePCType(StringRef Str) const;
+ Optional<uint64_t> parseFrameNumber(StringRef Str) const;
bool checkTag(const MarkupNode &Node) const;
bool checkNumFields(const MarkupNode &Element, size_t Size) const;
diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 70fb49d74b3a1..d96c0c85d5bd1 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -27,6 +27,7 @@
#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -216,6 +217,8 @@ bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
return true;
if (tryPC(Node))
return true;
+ if (tryBackTrace(Node))
+ return true;
return tryData(Node);
}
@@ -269,8 +272,7 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
printRawElement(Node);
return true;
}
- if (LI->FileName == DILineInfo::BadString &&
- LI->FunctionName == DILineInfo::BadString && LI->Line == 0) {
+ if (!*LI) {
printRawElement(Node);
return true;
}
@@ -286,6 +288,87 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
return true;
}
+bool MarkupFilter::tryBackTrace(const MarkupNode &Node) {
+ if (Node.Tag != "bt")
+ return false;
+ if (!checkNumFieldsAtLeast(Node, 2))
+ return true;
+ if (!checkNumFieldsAtMost(Node, 3))
+ return true;
+
+ Optional<uint64_t> FrameNumber = parseFrameNumber(Node.Fields[0]);
+ if (!FrameNumber)
+ return true;
+
+ Optional<uint64_t> Addr = parseAddr(Node.Fields[1]);
+ if (!Addr)
+ return true;
+
+ // Backtrace addresses are assumed to be return addresses by default.
+ PCType Type = PCType::ReturnAddress;
+ if (Node.Fields.size() == 3) {
+ Optional<PCType> ParsedType = parsePCType(Node.Fields[2]);
+ if (!ParsedType)
+ return true;
+ Type = *ParsedType;
+ }
+ *Addr = adjustAddr(*Addr, Type);
+
+ const MMap *MMap = getContainingMMap(*Addr);
+ if (!MMap) {
+ WithColor::error() << "no mmap covers address\n";
+ reportLocation(Node.Fields[0].begin());
+ printRawElement(Node);
+ return true;
+ }
+ uint64_t MRA = MMap->getModuleRelativeAddr(*Addr);
+
+ Expected<DIInliningInfo> II =
+ Symbolizer.symbolizeInlinedCode(MMap->Mod->BuildID, {MRA});
+ if (!II) {
+ WithColor::defaultErrorHandler(II.takeError());
+ printRawElement(Node);
+ return true;
+ }
+
+ highlight();
+ for (unsigned I = 0, E = II->getNumberOfFrames(); I != E; ++I) {
+ auto Header = formatv("{0, +6}", formatv("#{0}", FrameNumber)).sstr<16>();
+ // Don't highlight the # sign as a value.
+ size_t NumberIdx = Header.find("#") + 1;
+ OS << Header.substr(0, NumberIdx);
+ printValue(Header.substr(NumberIdx));
+ if (I == E - 1) {
+ OS << " ";
+ } else {
+ OS << '.';
+ printValue(formatv("{0, -2}", I + 1));
+ }
+ printValue(formatv(" {0:x16} ", *Addr));
+
+ DILineInfo LI = II->getFrame(I);
+ if (LI) {
+ printValue(LI.FunctionName);
+ OS << ' ';
+ printValue(LI.FileName);
+ OS << ':';
+ printValue(Twine(LI.Line));
+ OS << ':';
+ printValue(Twine(LI.Column));
+ OS << ' ';
+ }
+ OS << '(';
+ printValue(MMap->Mod->Name);
+ OS << "+";
+ printValue(formatv("{0:x}", MRA));
+ OS << ')';
+ if (I != E - 1)
+ OS << lineEnding();
+ }
+ restoreColor();
+ return true;
+}
+
bool MarkupFilter::tryData(const MarkupNode &Node) {
if (Node.Tag != "data")
return false;
@@ -502,6 +585,16 @@ Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const {
return ID;
}
+// Parse a frame number (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const {
+ uint64_t ID;
+ if (Str.getAsInteger(10, ID)) {
+ reportTypeError(Str, "frame number");
+ return None;
+ }
+ return ID;
+}
+
// Parse a build ID (%x in the spec).
Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const {
std::string Bytes;
diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-bt.test b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test
new file mode 100644
index 0000000000000..a1701135f2d9a
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-bt.test
@@ -0,0 +1,213 @@
+REQUIRES: x86-registered-target
+RUN: split-file %s %t
+RUN: mkdir -p %t/.build-id/ab
+RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \
+RUN: -o %t/.build-id/ab/cdef.debug
+RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \
+RUN: > %t.output 2> %t.err
+RUN: FileCheck %s --input-file=%t.output --match-full-lines \
+RUN: --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x10-0x10f](r)[[END:\]{3}]]
+CHECK: #0.1 0x0000000000000018 second /tmp[[SEP:[/\\]]]tmp.c:8:3 (a.o+0x8)
+CHECK: #0 0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK: #1.1 0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8)
+CHECK: #1 0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK: #0.1 0x0000000000000018 second /tmp[[SEP]]tmp.c:8:3 (a.o+0x8)
+CHECK: #0 0x0000000000000018 first /tmp[[SEP]]tmp.c:4:3 (a.o+0x8)
+CHECK: #0 0x0000000000000019 first /tmp[[SEP]]tmp.c:5:1 (a.o+0x9)
+CHECK: #0 0x00000000000000fe (a.o+0xee)
+CHECK: [[BEGIN]]bt:0:0x111[[END]]
+
+ERR: error: expected at least 2 field(s); found 0
+ERR: error: no mmap covers address
+ERR: error: expected PC type; found ''
+ERR: error: expected at most 3 field(s); found 4
+
+;--- input
+{{{module:0:a.o:elf:abcdef}}}
+{{{mmap:0x10:256:load:0:r:0}}}
+{{{bt:0:0x19}}}
+{{{bt:1:0x19}}}
+{{{bt:0:0x19:ra}}}
+{{{bt:0:0x19:pc}}}
+{{{bt:0:0xff}}}
+
+{{{bt}}}
+{{{bt:0:0x111}}}
+{{{bt:0:0:}}}
+{{{bt:0:0:pc:}}}
+;--- asm.s
+# Generated by running "clang -finline -g -S tmp.c" in the following tmp.c on
+# Linux x86_64:
+#
+# static void second(void);
+# void first(void) {
+# second();
+# }
+# void second(void) {}
+ .text
+ .file "tmp.c"
+ .globl first # -- Begin function first
+ .p2align 4, 0x90
+ .type first, at function
+first: # @first
+.Lfunc_begin0:
+ .file 1 "/tmp" "tmp.c"
+ .loc 1 3 0 # tmp.c:3:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+.Ltmp0:
+ .loc 1 8 3 prologue_end # tmp.c:8:3
+ callq first
+.Ltmp1:
+ .loc 1 5 1 # tmp.c:5:1
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp2:
+.Lfunc_end0:
+ .size first, .Lfunc_end0-first
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 14 # DW_FORM_strp
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 39 # DW_AT_prototyped
+ .byte 25 # DW_FORM_flag_present
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 32 # DW_AT_inline
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 39 # DW_AT_prototyped
+ .byte 25 # DW_FORM_flag_present
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 29 # DW_TAG_inlined_subroutine
+ .byte 0 # DW_CHILDREN_no
+ .byte 49 # DW_AT_abstract_origin
+ .byte 19 # DW_FORM_ref4
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 88 # DW_AT_call_file
+ .byte 11 # DW_FORM_data1
+ .byte 89 # DW_AT_call_line
+ .byte 11 # DW_FORM_data1
+ .byte 87 # DW_AT_call_column
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x52 DW_TAG_compile_unit
+ .long .Linfo_string0 # DW_AT_producer
+ .short 12 # DW_AT_language
+ .long .Linfo_string1 # DW_AT_name
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Linfo_string2 # DW_AT_comp_dir
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 2 # Abbrev [2] 0x2a:0x8 DW_TAG_subprogram
+ .long .Linfo_string3 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 7 # DW_AT_decl_line
+ # DW_AT_prototyped
+ # DW_AT_external
+ .byte 1 # DW_AT_inline
+ .byte 3 # Abbrev [3] 0x32:0x2a DW_TAG_subprogram
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .long .Linfo_string4 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 3 # DW_AT_decl_line
+ # DW_AT_prototyped
+ # DW_AT_external
+ .byte 4 # Abbrev [4] 0x47:0x14 DW_TAG_inlined_subroutine
+ .long 42 # DW_AT_abstract_origin
+ .quad .Ltmp0 # DW_AT_low_pc
+ .long .Ltmp1-.Ltmp0 # DW_AT_high_pc
+ .byte 1 # DW_AT_call_file
+ .byte 4 # DW_AT_call_line
+ .byte 3 # DW_AT_call_column
+ .byte 0 # End Of Children Mark
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str,"MS", at progbits,1
+.Linfo_string0:
+ .asciz "Debian clang version 13.0.1-6" # string offset=0
+.Linfo_string1:
+ .asciz "tmp.c" # string offset=30
+.Linfo_string2:
+ .asciz "/tmp" # string offset=36
+.Linfo_string3:
+ .asciz "second" # string offset=85
+.Linfo_string4:
+ .asciz "first" # string offset=92
+ .ident "Debian clang version 13.0.1-6"
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .addrsig_sym first
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
More information about the llvm-branch-commits
mailing list