[llvm] bf48b12 - [Symbolizer] Implement pc element in symbolizing filter.

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 8 11:08:52 PDT 2022


Author: Daniel Thornburgh
Date: 2022-08-08T11:08:48-07:00
New Revision: bf48b128b02813e53e0c8f6585db837d14c9358f

URL: https://github.com/llvm/llvm-project/commit/bf48b128b02813e53e0c8f6585db837d14c9358f
DIFF: https://github.com/llvm/llvm-project/commit/bf48b128b02813e53e0c8f6585db837d14c9358f.diff

LOG: [Symbolizer] Implement pc element in symbolizing filter.

Implements the pc element for the symbolizing filter, including it's
"ra" and "pc" modes. Return addresses ("ra") are adjusted by
decrementing one. By default, {{{pc}}} elements are assumed to point to
precise code ("pc") locations. Backtrace elements will adopt the
opposite convention.

Along the way, some minor refactors of value printing and colorization.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D131115

Added: 
    llvm/test/DebugInfo/symbolize-filter-markup-pc.test

Modified: 
    llvm/docs/CommandGuide/llvm-symbolizer.rst
    llvm/docs/SymbolizerMarkupFormat.rst
    llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
    llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
    llvm/test/DebugInfo/symbolize-filter-markup-data.test

Removed: 
    


################################################################################
diff  --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 3fff88a7f1651..5870e0e9e95e6 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -254,7 +254,6 @@ OPTIONS
   and prints the results to standard output. The following markup elements are
   not yet supported:
 
-  * ``{{pc}}``
   * ``{{bt}}``
   * ``{{hexdict}}``
   * ``{{dumpfile}}``

diff  --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst
index 319a330219506..b06cc20f41ef4 100644
--- a/llvm/docs/SymbolizerMarkupFormat.rst
+++ b/llvm/docs/SymbolizerMarkupFormat.rst
@@ -184,7 +184,7 @@ human-readable symbolic form.
     {{{symbol:_ZN7Mangled4NameEv}}}
     {{{symbol:foobar}}}
 
-``{{{pc:%p}}}``, ``{{{pc:%p:ra}}}``, ``{{{pc:%p:pc}}}`` [#not_yet_implemented]_
+``{{{pc:%p}}}``, ``{{{pc:%p:ra}}}``, ``{{{pc:%p:pc}}}``
 
   Here ``%p`` is the memory address of a code location. It might be presented as a
   function name and source location. The second two forms distinguish the kind of

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index 3a2c2bf490411..b597e9ba7ba25 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -71,6 +71,15 @@ class MarkupFilter {
     SmallVector<const MMap *> MMaps = {};
   };
 
+  // The semantics of a possible program counter value.
+  enum class PCType {
+    // The address is a return address and must be adjusted to point to the call
+    // itself.
+    ReturnAddress,
+    // The address is the precise location in the code and needs no adjustment.
+    PreciseCode,
+  };
+
   bool tryContextualElement(const MarkupNode &Node,
                             const SmallVector<MarkupNode> &DeferredNodes);
   bool tryMMap(const MarkupNode &Element,
@@ -87,6 +96,7 @@ class MarkupFilter {
 
   bool tryPresentation(const MarkupNode &Node);
   bool trySymbol(const MarkupNode &Node);
+  bool tryPC(const MarkupNode &Node);
   bool tryData(const MarkupNode &Node);
 
   bool trySGR(const MarkupNode &Node);
@@ -96,6 +106,9 @@ class MarkupFilter {
   void restoreColor();
   void resetColor();
 
+  void printRawElement(const MarkupNode &Element);
+  void printValue(Twine Value);
+
   Optional<Module> parseModule(const MarkupNode &Element) const;
   Optional<MMap> parseMMap(const MarkupNode &Element) const;
 
@@ -104,10 +117,12 @@ class MarkupFilter {
   Optional<uint64_t> parseSize(StringRef Str) const;
   Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
   Optional<std::string> parseMode(StringRef Str) const;
+  Optional<PCType> parsePCType(StringRef Str) const;
 
   bool checkTag(const MarkupNode &Node) const;
   bool checkNumFields(const MarkupNode &Element, size_t Size) const;
   bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const;
+  bool checkNumFieldsAtMost(const MarkupNode &Element, size_t Size) const;
 
   void reportTypeError(StringRef Str, StringRef TypeName) const;
   void reportLocation(StringRef::iterator Loc) const;
@@ -115,6 +130,8 @@ class MarkupFilter {
   const MMap *getOverlappingMMap(const MMap &Map) const;
   const MMap *getContainingMMap(uint64_t Addr) const;
 
+  uint64_t adjustAddr(uint64_t Addr, PCType Type) const;
+
   StringRef lineEnding() const;
 
   raw_ostream &OS;

diff  --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 2bf2e17514e1d..70fb49d74b3a1 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/Symbolize/Markup.h"
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/Debuginfod/Debuginfod.h"
@@ -163,18 +164,17 @@ bool MarkupFilter::tryModule(const MarkupNode &Node,
     filterNode(Node);
   beginModuleInfoLine(&Module);
   OS << "; BuildID=";
-  highlightValue();
-  OS << toHex(Module.BuildID, /*LowerCase=*/true);
-  highlight();
+  printValue(toHex(Module.BuildID, /*LowerCase=*/true));
   return true;
 }
 
 void MarkupFilter::beginModuleInfoLine(const Module *M) {
   highlight();
   OS << "[[[ELF module";
-  highlightValue();
-  OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name);
-  highlight();
+  printValue(formatv(" #{0:x} ", M->ID));
+  OS << '"';
+  printValue(M->Name);
+  OS << '"';
   MIL = ModuleInfoLine{M};
 }
 
@@ -186,13 +186,12 @@ void MarkupFilter::endAnyModuleInfoLine() {
   });
   for (const MMap *M : MIL->MMaps) {
     OS << (M == MIL->MMaps.front() ? ' ' : ',');
-    highlightValue();
-    OS << formatv("[{0:x}-{1:x}]", M->Addr, M->Addr + M->Size - 1);
-    highlight();
-    OS << '(';
-    highlightValue();
-    OS << M->Mode;
-    highlight();
+    OS << '[';
+    printValue(formatv("{0:x}", M->Addr));
+    OS << '-';
+    printValue(formatv("{0:x}", M->Addr + M->Size - 1));
+    OS << "](";
+    printValue(M->Mode);
     OS << ')';
   }
   OS << "]]]" << lineEnding();
@@ -215,6 +214,8 @@ void MarkupFilter::filterNode(const MarkupNode &Node) {
 bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
   if (trySymbol(Node))
     return true;
+  if (tryPC(Node))
+    return true;
   return tryData(Node);
 }
 
@@ -230,6 +231,61 @@ bool MarkupFilter::trySymbol(const MarkupNode &Node) {
   return true;
 }
 
+bool MarkupFilter::tryPC(const MarkupNode &Node) {
+  if (Node.Tag != "pc")
+    return false;
+  if (!checkNumFieldsAtLeast(Node, 1))
+    return true;
+  if (!checkNumFieldsAtMost(Node, 2))
+    return true;
+
+  Optional<uint64_t> Addr = parseAddr(Node.Fields[0]);
+  if (!Addr)
+    return true;
+
+  // PC addresses that aren't part of a backtrace are assumed to be precise code
+  // locations.
+  PCType Type = PCType::PreciseCode;
+  if (Node.Fields.size() == 2) {
+    Optional<PCType> ParsedType = parsePCType(Node.Fields[1]);
+    if (!ParsedType)
+      return true;
+    Type = *ParsedType;
+  }
+  *Addr = adjustAddr(*Addr, Type);
+
+  const MMap *MMap = getContainingMMap(*Addr);
+  if (!MMap) {
+    WithColor::error() << "no mmap covers address\n";
+    reportLocation(Node.Fields[0].begin());
+    printRawElement(Node);
+    return true;
+  }
+
+  Expected<DILineInfo> LI = Symbolizer.symbolizeCode(
+      MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)});
+  if (!LI) {
+    WithColor::defaultErrorHandler(LI.takeError());
+    printRawElement(Node);
+    return true;
+  }
+  if (LI->FileName == DILineInfo::BadString &&
+      LI->FunctionName == DILineInfo::BadString && LI->Line == 0) {
+    printRawElement(Node);
+    return true;
+  }
+
+  highlight();
+  printValue(LI->FunctionName);
+  OS << '[';
+  printValue(LI->FileName);
+  OS << ':';
+  printValue(Twine(LI->Line));
+  OS << ']';
+  restoreColor();
+  return true;
+}
+
 bool MarkupFilter::tryData(const MarkupNode &Node) {
   if (Node.Tag != "data")
     return false;
@@ -239,21 +295,11 @@ bool MarkupFilter::tryData(const MarkupNode &Node) {
   if (!Addr)
     return true;
 
-  const auto PrintRaw = [&]() {
-    highlight();
-    OS << "[[[data:";
-    highlightValue();
-    OS << "0x" << toHex(*Addr, /*LowerCase=*/true);
-    highlight();
-    OS << "]]]\n";
-    restoreColor();
-  };
-
   const MMap *MMap = getContainingMMap(*Addr);
   if (!MMap) {
     WithColor::error() << "no mmap covers address\n";
     reportLocation(Node.Fields[0].begin());
-    PrintRaw();
+    printRawElement(Node);
     return true;
   }
 
@@ -261,7 +307,7 @@ bool MarkupFilter::tryData(const MarkupNode &Node) {
       MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)});
   if (!Symbol) {
     WithColor::defaultErrorHandler(Symbol.takeError());
-    PrintRaw();
+    printRawElement(Node);
     return true;
   }
 
@@ -343,6 +389,24 @@ void MarkupFilter::resetColor() {
     OS.resetColor();
 }
 
+void MarkupFilter::printRawElement(const MarkupNode &Element) {
+  highlight();
+  OS << "[[[";
+  printValue(Element.Tag);
+  for (StringRef Field : Element.Fields) {
+    OS << ':';
+    printValue(Field);
+  }
+  OS << "]]]";
+  restoreColor();
+}
+
+void MarkupFilter::printValue(Twine Value) {
+  highlightValue();
+  OS << Value;
+  highlight();
+}
+
 // This macro helps reduce the amount of indirection done through Optional
 // below, since the usual case upon returning a None Optional is to return None.
 #define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR)                                \
@@ -476,6 +540,17 @@ Optional<std::string> MarkupFilter::parseMode(StringRef Str) const {
   return Str.lower();
 }
 
+Optional<MarkupFilter::PCType> MarkupFilter::parsePCType(StringRef Str) const {
+  Optional<MarkupFilter::PCType> Type =
+      StringSwitch<Optional<MarkupFilter::PCType>>(Str)
+          .Case("ra", MarkupFilter::PCType::ReturnAddress)
+          .Case("pc", MarkupFilter::PCType::PreciseCode)
+          .Default(None);
+  if (!Type)
+    reportTypeError(Str, "PC type");
+  return Type;
+}
+
 bool MarkupFilter::checkTag(const MarkupNode &Node) const {
   if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) {
     WithColor::error(errs()) << "tags must be all lowercase characters\n";
@@ -508,6 +583,18 @@ bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element,
   return true;
 }
 
+bool MarkupFilter::checkNumFieldsAtMost(const MarkupNode &Element,
+                                        size_t Size) const {
+  if (Element.Fields.size() > Size) {
+    WithColor::error(errs())
+        << "expected at most " << Size << " field(s); found "
+        << Element.Fields.size() << "\n";
+    reportLocation(Element.Tag.end());
+    return false;
+  }
+  return true;
+}
+
 void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const {
   WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str
                            << "'\n";
@@ -556,6 +643,14 @@ const MarkupFilter::MMap *MarkupFilter::getContainingMMap(uint64_t Addr) const {
   return I->second.contains(Addr) ? &I->second : nullptr;
 }
 
+uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const {
+  // Decrementing return addresses by one moves them into the call instruction.
+  // The address doesn't have to be the start of the call instruction, just some
+  // byte on the inside. Subtracting one avoids needing detailed instruction
+  // length information here.
+  return Type == MarkupFilter::PCType::ReturnAddress ? Addr - 1 : Addr;
+}
+
 StringRef MarkupFilter::lineEnding() const {
   return Line.endswith("\r\n") ? "\r\n" : "\n";
 }

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-data.test b/llvm/test/DebugInfo/symbolize-filter-markup-data.test
index 3ce1baea4d6cd..ed7066c7ca780 100644
--- a/llvm/test/DebugInfo/symbolize-filter-markup-data.test
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-data.test
@@ -12,7 +12,7 @@ RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
 CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x0-0x4](r),[0x10-0x11](r)[[END:\]{3}]]
 CHECK: long long byte
 CHECK: long byte
-CHECK: [[BEGIN]]data:0x05[[END]]
+CHECK: [[BEGIN]]data:0x5[[END]]
 
 ERR: error: expected 1 field(s); found 0
 ERR: error: no mmap covers address

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-pc.test b/llvm/test/DebugInfo/symbolize-filter-markup-pc.test
new file mode 100644
index 0000000000000..5d77a5c724111
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-pc.test
@@ -0,0 +1,188 @@
+REQUIRES: x86-registered-target
+RUN: split-file %s %t
+RUN: mkdir -p %t/.build-id/ab
+RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \
+RUN:   -o %t/.build-id/ab/cdef.debug
+RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \
+RUN:   > %t.output 2> %t.err
+RUN: FileCheck %s --input-file=%t.output --match-full-lines \
+RUN:   --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x0-0xff](r)[[END:\]{3}]]
+CHECK: first[/dir/tmp.c:3]
+CHECK: first[/dir/tmp.c:5]
+CHECK: first[/dir/tmp.c:4]
+CHECK: first[/dir/tmp.c:5]
+CHECK: [[BEGIN]]pc:0xff[[END]]
+CHECK: [[BEGIN]]pc:0x100[[END]]
+
+ERR: error: expected at least 1 field(s); found 0
+ERR: error: no mmap covers address
+ERR: error: expected PC type; found ''
+ERR: error: expected at most 2 field(s); found 3
+
+;--- input
+{{{module:0:a.o:elf:abcdef}}}
+{{{mmap:0:256:load:0:r:0}}}
+{{{pc:0}}}
+{{{pc:0x9}}}
+{{{pc:0x9:ra}}}
+{{{pc:0x9:pc}}}
+{{{pc:0xff}}}
+
+{{{pc}}}
+{{{pc:0x100}}}
+{{{pc:0x9:}}}
+{{{pc:0x9:pc:}}}
+;--- asm.s
+	.text
+	.file	"tmp.c"
+	.globl	first                           # -- Begin function first
+	.p2align	4, 0x90
+	.type	first, at function
+first:                                  # @first
+.Lfunc_begin0:
+	.file	1 "/dir" "tmp.c"
+	.loc	1 3 0                           # tmp.c:3:0
+	.cfi_startproc
+# %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+.Ltmp0:
+	.loc	1 4 3 prologue_end              # tmp.c:4:3
+	callq	second
+	.loc	1 5 1                           # tmp.c:5:1
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp1:
+.Lfunc_end0:
+	.size	first, .Lfunc_end0-first
+	.cfi_endproc
+                                        # -- End function
+	.globl	second                          # -- Begin function second
+	.p2align	4, 0x90
+	.type	second, at function
+second:                                 # @second
+.Lfunc_begin1:
+	.loc	1 7 0                           # tmp.c:7:0
+	.cfi_startproc
+# %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+.Ltmp2:
+	.loc	1 8 3 prologue_end              # tmp.c:8:3
+	callq	first
+	.loc	1 9 1                           # tmp.c:9:1
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp3:
+.Lfunc_end1:
+	.size	second, .Lfunc_end1-second
+	.cfi_endproc
+                                        # -- End function
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	14                              # DW_FORM_strp
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	14                              # DW_FORM_strp
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	0                               # DW_CHILDREN_no
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	39                              # DW_AT_prototyped
+	.byte	25                              # DW_FORM_flag_present
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	4                               # DWARF version number
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	8                               # Address Size (in bytes)
+	.byte	1                               # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit
+	.long	.Linfo_string0                  # DW_AT_producer
+	.short	12                              # DW_AT_language
+	.long	.Linfo_string1                  # DW_AT_name
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.long	.Linfo_string2                  # DW_AT_comp_dir
+	.quad	.Lfunc_begin0                   # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	2                               # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram
+	.quad	.Lfunc_begin0                   # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.long	.Linfo_string3                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	3                               # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+	.byte	2                               # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram
+	.quad	.Lfunc_begin1                   # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin1       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.long	.Linfo_string4                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	7                               # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"clang" # string offset=0
+.Linfo_string1:
+	.asciz	"tmp.c"                         # string offset=30
+.Linfo_string2:
+	.asciz	"/dir" # string offset=36
+.Linfo_string3:
+	.asciz	"first"                         # string offset=85
+.Linfo_string4:
+	.asciz	"second"                        # string offset=91
+	.ident	"clang"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.addrsig_sym first
+	.addrsig_sym second
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:


        


More information about the llvm-commits mailing list