[llvm] 17e4c21 - [Symbolizer] Implement contextual symbolizer markup elements.

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 21 11:29:24 PDT 2022


Author: Daniel Thornburgh
Date: 2022-07-21T11:29:19-07:00
New Revision: 17e4c217b66305e60657a48f10fe3c428c2fe4d2

URL: https://github.com/llvm/llvm-project/commit/17e4c217b66305e60657a48f10fe3c428c2fe4d2
DIFF: https://github.com/llvm/llvm-project/commit/17e4c217b66305e60657a48f10fe3c428c2fe4d2.diff

LOG: [Symbolizer] Implement contextual symbolizer markup elements.

This change implements the contextual symbolizer markup elements: reset,
module, and mmap. These provide information about the runtime context of
the binary necessary to resolve addresses to symbolic values.

Summary information is printed to the output about this context.
Multiple mmap elements for the same module line are coalesced together.
The standard requires that such elements occur on their own lines to
allow for this; accordingly, anything after a contextual element on a
line is silently discarded.

Implementing this cleanly requires that the filter drive the parser;
this allows skipped sections to avoid being parsed. This also makes the
filter quite a bit easier to use, at the cost of some unused
flexibility.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D129519

Added: 
    llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test
    llvm/test/DebugInfo/symbolize-filter-markup-mmap.test
    llvm/test/DebugInfo/symbolize-filter-markup-module.test
    llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
    llvm/test/DebugInfo/symbolize-filter-markup-reset.test

Modified: 
    llvm/docs/CommandGuide/llvm-symbolizer.rst
    llvm/docs/SymbolizerMarkupFormat.rst
    llvm/include/llvm/DebugInfo/Symbolize/Markup.h
    llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
    llvm/lib/DebugInfo/Symbolize/Markup.cpp
    llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
    llvm/test/DebugInfo/symbolize-filter-markup-error-location.test
    llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 22ed6d9de00a8..33b5fa5b0fe1a 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -255,6 +255,9 @@ OPTIONS
   markup elements are supported:
 
   * ``{{symbol}}``
+  * ``{{reset}}``
+  * ``{{module}}``
+  * ``{{mmap}}``
 
 .. _llvm-symbolizer-opt-f:
 

diff  --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst
index dfd9d6b5b7706..95ac5d89d84e7 100644
--- a/llvm/docs/SymbolizerMarkupFormat.rst
+++ b/llvm/docs/SymbolizerMarkupFormat.rst
@@ -360,7 +360,7 @@ elements should have appeared somewhere earlier in the logging stream. It should
 always be possible for the symbolizing filter to be implemented as a single pass
 over the raw logging stream, accumulating context and massaging text as it goes.
 
-``{{{reset}}}`` [#not_yet_implemented]_
+``{{{reset}}}``
 
   This should be output before any other contextual element. The need for this
   contextual element is to support implementations that handle logs coming from
@@ -372,7 +372,7 @@ over the raw logging stream, accumulating context and massaging text as it goes.
   previous process's contextual elements is not assumed for new process that
   just happens have the same identifying information.
 
-``{{{module:%i:%s:%s:...}}}`` [#not_yet_implemented]_
+``{{{module:%i:%s:%s:...}}}``
 
   This element represents a so-called "module". A "module" is a single linked
   binary, such as a loaded ELF file. Usually each module occupies a contiguous
@@ -399,7 +399,7 @@ over the raw logging stream, accumulating context and massaging text as it goes.
 
     {{{module:1:libc.so:elf:83238ab56ba10497}}}
 
-``{{{mmap:%p:%i:...}}}`` [#not_yet_implemented]_
+``{{{mmap:%p:%i:...}}}``
 
   This contextual element is used to give information about a particular region
   in memory. ``%p`` is the starting address and ``%i`` gives the size in hex of the

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h
index 2628b47cf6d3e..4f2b0de481ece 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h
@@ -84,6 +84,10 @@ class MarkupParser {
   /// \returns the next markup node or None if none remain.
   Optional<MarkupNode> nextNode();
 
+  bool isSGR(const MarkupNode &Node) const {
+    return SGRSyntax.match(Node.Text);
+  }
+
 private:
   Optional<MarkupNode> parseElement(StringRef Line);
   void parseTextOutsideMarkup(StringRef Text);

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index b7d70ccafe66d..a224c3205f0bc 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -17,6 +17,9 @@
 
 #include "Markup.h"
 
+#include <map>
+
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -29,45 +32,106 @@ class MarkupFilter {
 public:
   MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled = llvm::None);
 
-  /// Begins a logical \p Line of markup.
-  ///
-  /// This must be called for each line of the input stream before calls to
-  /// filter() for elements of that line. The provided \p Line must be the same
-  /// one that was passed to parseLine() to produce the elements to be later
-  /// passed to filter().
+  /// Filters a line containing symbolizer markup and writes the human-readable
+  /// results to the output stream.
   ///
-  /// This informs the filter that a new line is beginning and establishes a
-  /// context for error location reporting.
-  void beginLine(StringRef Line);
+  /// Invalid or unimplemented markup elements are removed. Some output may be
+  /// deferred until future filter() or finish() call.
+  void filter(StringRef Line);
 
-  /// Handle a \p Node of symbolizer markup.
-  ///
-  /// If the node is a recognized, valid markup element, it is replaced with a
-  /// human-readable string. If the node isn't an element or the element isn't
-  /// recognized, it is output verbatim. If the element is recognized but isn't
-  /// valid, it is omitted from the output.
-  void filter(const MarkupNode &Node);
+  /// Records that the input stream has ended and writes any deferred output.
+  void finish();
 
 private:
+  struct Module {
+    uint64_t ID;
+    std::string Name;
+    SmallVector<uint8_t> BuildID;
+  };
+
+  struct MMap {
+    uint64_t Addr;
+    uint64_t Size;
+    const Module *Module;
+    std::string Mode; // Lowercase
+    uint64_t ModuleRelativeAddr;
+
+    bool contains(uint64_t Addr) const;
+  };
+
+  // An informational module line currently being constructed. As many mmap
+  // elements as possible are folded into one ModuleInfo line.
+  struct ModuleInfoLine {
+    const Module *Module;
+
+    SmallVector<const MMap *> MMaps = {};
+  };
+
+  bool tryContextualElement(const MarkupNode &Node,
+                            const SmallVector<MarkupNode> &DeferredNodes);
+  bool tryMMap(const MarkupNode &Element,
+               const SmallVector<MarkupNode> &DeferredNodes);
+  bool tryReset(const MarkupNode &Element,
+                const SmallVector<MarkupNode> &DeferredNodes);
+  bool tryModule(const MarkupNode &Element,
+                 const SmallVector<MarkupNode> &DeferredNodes);
+
+  void beginModuleInfoLine(const Module *M);
+  void endAnyModuleInfoLine();
+
+  void filterNode(const MarkupNode &Node);
+
+  bool tryPresentation(const MarkupNode &Node);
+  bool trySymbol(const MarkupNode &Node);
+
   bool trySGR(const MarkupNode &Node);
 
   void highlight();
+  void highlightValue();
   void restoreColor();
   void resetColor();
 
+  Optional<Module> parseModule(const MarkupNode &Element) const;
+  Optional<MMap> parseMMap(const MarkupNode &Element) const;
+
+  Optional<uint64_t> parseAddr(StringRef Str) const;
+  Optional<uint64_t> parseModuleID(StringRef Str) const;
+  Optional<uint64_t> parseSize(StringRef Str) const;
+  Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
+  Optional<std::string> parseMode(StringRef Str) const;
+
   bool checkTag(const MarkupNode &Node) const;
-  bool checkNumFields(const MarkupNode &Node, size_t Size) const;
+  bool checkNumFields(const MarkupNode &Element, size_t Size) const;
+  bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const;
 
   void reportTypeError(StringRef Str, StringRef TypeName) const;
   void reportLocation(StringRef::iterator Loc) const;
 
+  const MMap *overlappingMMap(const MMap &Map) const;
+
+  StringRef lineEnding() const;
+
   raw_ostream &OS;
   const bool ColorsEnabled;
 
+  MarkupParser Parser;
+
+  // Current line being filtered.
   StringRef Line;
 
+  // A module info line currently being built. This incorporates as much mmap
+  // information as possible before being emitted.
+  Optional<ModuleInfoLine> MIL;
+
+  // SGR state.
   Optional<raw_ostream::Colors> Color;
   bool Bold = false;
+
+  // Map from Module ID to Module.
+  DenseMap<uint64_t, std::unique_ptr<Module>> Modules;
+
+  // Ordered map from starting address to mmap.
+  std::map<uint64_t, MMap> MMaps;
 };
 
 } // end namespace symbolize

diff  --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
index 9bc65e763287f..aa8a89812227f 100644
--- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
@@ -100,6 +100,9 @@ Optional<MarkupNode> MarkupParser::nextNode() {
 }
 
 void MarkupParser::flush() {
+  Buffer.clear();
+  NextIdx = 0;
+  Line = {};
   if (InProgressMultiline.empty())
     return;
   FinishedMultiline.swap(InProgressMultiline);

diff  --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 3363fe5e531fa..ad21df62b9b36 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -10,14 +10,22 @@
 /// This file defines the implementation of a filter that replaces symbolizer
 /// markup with human-readable expressions.
 ///
+/// See https://llvm.org/docs/SymbolizerMarkupFormat.html
+///
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
 
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+#include "llvm/Debuginfod/Debuginfod.h"
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -28,30 +36,195 @@ MarkupFilter::MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled)
     : OS(OS), ColorsEnabled(ColorsEnabled.value_or(
                   WithColor::defaultAutoDetectFunction()(OS))) {}
 
-void MarkupFilter::beginLine(StringRef Line) {
+void MarkupFilter::filter(StringRef Line) {
   this->Line = Line;
   resetColor();
+
+  Parser.parseLine(Line);
+  SmallVector<MarkupNode> DeferredNodes;
+  // See if the line is a contextual (i.e. contains a contextual element).
+  // In this case, anything after the contextual element is elided, or the whole
+  // line may be elided.
+  while (Optional<MarkupNode> Node = Parser.nextNode()) {
+    // If this was a contextual line, then summarily stop processing.
+    if (tryContextualElement(*Node, DeferredNodes))
+      return;
+    // This node may yet be part of an elided contextual line.
+    DeferredNodes.push_back(*Node);
+  }
+
+  // This was not a contextual line, so nothing in it should be elided.
+  endAnyModuleInfoLine();
+  for (const MarkupNode &Node : DeferredNodes)
+    filterNode(Node);
 }
 
-void MarkupFilter::filter(const MarkupNode &Node) {
-  if (!checkTag(Node))
-    return;
+void MarkupFilter::finish() {
+  Parser.flush();
+  while (Optional<MarkupNode> Node = Parser.nextNode())
+    filterNode(*Node);
+  endAnyModuleInfoLine();
+  resetColor();
+  Modules.clear();
+  MMaps.clear();
+}
 
-  if (trySGR(Node))
-    return;
+// See if the given node is a contextual element and handle it if so. This may
+// either output or defer the element; in the former case, it will first emit
+// any DeferredNodes.
+//
+// Returns true if the given element was a contextual element. In this case,
+// DeferredNodes should be considered handled and should not be emitted. The
+// rest of the containing line must also be ignored in case the element was
+// deferred to a following line.
+bool MarkupFilter::tryContextualElement(
+    const MarkupNode &Node, const SmallVector<MarkupNode> &DeferredNodes) {
+  if (tryMMap(Node, DeferredNodes))
+    return true;
+  if (tryReset(Node, DeferredNodes))
+    return true;
+  return tryModule(Node, DeferredNodes);
+}
 
-  if (Node.Tag == "symbol") {
-    if (!checkNumFields(Node, 1))
-      return;
+bool MarkupFilter::tryMMap(const MarkupNode &Node,
+                           const SmallVector<MarkupNode> &DeferredNodes) {
+  if (Node.Tag != "mmap")
+    return false;
+  Optional<MMap> ParsedMMap = parseMMap(Node);
+  if (!ParsedMMap)
+    return true;
+
+  if (const MMap *M = overlappingMMap(*ParsedMMap)) {
+    WithColor::error(errs())
+        << formatv("overlapping mmap: #{0:x} [{1:x},{2:x})\n", M->Module->ID,
+                   M->Addr, M->Addr + M->Size);
+    reportLocation(Node.Fields[0].begin());
+    return true;
+  }
+
+  auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap));
+  assert(Res.second && "Overlap check should ensure emplace succeeds.");
+  MMap &MMap = Res.first->second;
+
+  if (!MIL || MIL->Module != MMap.Module) {
+    endAnyModuleInfoLine();
+    for (const MarkupNode &Node : DeferredNodes)
+      filterNode(Node);
+    beginModuleInfoLine(MMap.Module);
+    OS << "; adds";
+  }
+  MIL->MMaps.push_back(&MMap);
+  return true;
+}
+
+bool MarkupFilter::tryReset(const MarkupNode &Node,
+                            const SmallVector<MarkupNode> &DeferredNodes) {
+  if (Node.Tag != "reset")
+    return false;
+  if (!checkNumFields(Node, 0))
+    return true;
+
+  if (!Modules.empty() || !MMaps.empty()) {
+    Modules.clear();
+    MMaps.clear();
+
+    endAnyModuleInfoLine();
+    for (const MarkupNode &Node : DeferredNodes)
+      filterNode(Node);
     highlight();
-    OS << llvm::demangle(Node.Fields.front().str());
+    OS << "[[[reset]]]" << lineEnding();
     restoreColor();
+  }
+  return true;
+}
+
+bool MarkupFilter::tryModule(const MarkupNode &Node,
+                             const SmallVector<MarkupNode> &DeferredNodes) {
+  if (Node.Tag != "module")
+    return false;
+  Optional<Module> ParsedModule = parseModule(Node);
+  if (!ParsedModule)
+    return true;
+
+  auto Res = Modules.try_emplace(
+      ParsedModule->ID, std::make_unique<Module>(std::move(*ParsedModule)));
+  if (!Res.second) {
+    WithColor::error(errs()) << "duplicate module ID\n";
+    reportLocation(Node.Fields[0].begin());
+    return true;
+  }
+  Module &Module = *Res.first->second;
+
+  endAnyModuleInfoLine();
+  for (const MarkupNode &Node : DeferredNodes)
+    filterNode(Node);
+  beginModuleInfoLine(&Module);
+  OS << "; BuildID=";
+  highlightValue();
+  OS << toHex(Module.BuildID, /*LowerCase=*/true);
+  highlight();
+  return true;
+}
+
+void MarkupFilter::beginModuleInfoLine(const Module *M) {
+  highlight();
+  OS << "[[[ELF module";
+  highlightValue();
+  OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name);
+  highlight();
+  MIL = ModuleInfoLine{M};
+}
+
+void MarkupFilter::endAnyModuleInfoLine() {
+  if (!MIL)
     return;
+  llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) {
+    return A->Addr < B->Addr;
+  });
+  for (const MMap *M : MIL->MMaps) {
+    OS << (M == MIL->MMaps.front() ? ' ' : '-');
+    highlightValue();
+    OS << formatv("{0:x}", M->Addr);
+    highlight();
+    OS << '(';
+    highlightValue();
+    OS << M->Mode;
+    highlight();
+    OS << ')';
   }
+  OS << "]]]" << lineEnding();
+  restoreColor();
+  MIL.reset();
+}
+
+// Handle a node that is known not to be a contextual element.
+void MarkupFilter::filterNode(const MarkupNode &Node) {
+  if (!checkTag(Node))
+    return;
+  if (tryPresentation(Node))
+    return;
+  if (trySGR(Node))
+    return;
 
   OS << Node.Text;
 }
 
+bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
+  return trySymbol(Node);
+}
+
+bool MarkupFilter::trySymbol(const MarkupNode &Node) {
+  if (Node.Tag != "symbol")
+    return false;
+  if (!checkNumFields(Node, 1))
+    return true;
+
+  highlight();
+  OS << llvm::demangle(Node.Fields.front().str());
+  restoreColor();
+  return true;
+}
+
 bool MarkupFilter::trySGR(const MarkupNode &Node) {
   if (Node.Text == "\033[0m") {
     resetColor();
@@ -93,6 +266,13 @@ void MarkupFilter::highlight() {
                  Bold);
 }
 
+// Begin highlighting a field within a highlighted markup string.
+void MarkupFilter::highlightValue() {
+  if (!ColorsEnabled)
+    return;
+  OS.changeColor(raw_ostream::Colors::GREEN, Bold);
+}
+
 // Set the output stream's color to the current color and bold state of the SGR
 // abstract machine.
 void MarkupFilter::restoreColor() {
@@ -117,6 +297,139 @@ void MarkupFilter::resetColor() {
     OS.resetColor();
 }
 
+// This macro helps reduce the amount of indirection done through Optional
+// below, since the usual case upon returning a None Optional is to return None.
+#define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR)                                \
+  auto NAME##Opt = (EXPR);                                                     \
+  if (!NAME##Opt)                                                              \
+    return None;                                                               \
+  TYPE NAME = std::move(*NAME##Opt)
+
+Optional<MarkupFilter::Module>
+MarkupFilter::parseModule(const MarkupNode &Element) const {
+  if (!checkNumFieldsAtLeast(Element, 3))
+    return None;
+  ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0]));
+  StringRef Name = Element.Fields[1];
+  StringRef Type = Element.Fields[2];
+  if (Type != "elf") {
+    WithColor::error() << "unknown module type\n";
+    reportLocation(Type.begin());
+    return None;
+  }
+  if (!checkNumFields(Element, 4))
+    return None;
+  ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID,
+                        parseBuildID(Element.Fields[3]));
+  return Module{ID, Name.str(), std::move(BuildID)};
+}
+
+Optional<MarkupFilter::MMap>
+MarkupFilter::parseMMap(const MarkupNode &Element) const {
+  if (!checkNumFieldsAtLeast(Element, 3))
+    return None;
+  ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0]));
+  ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1]));
+  StringRef Type = Element.Fields[2];
+  if (Type != "load") {
+    WithColor::error() << "unknown mmap type\n";
+    reportLocation(Type.begin());
+    return None;
+  }
+  if (!checkNumFields(Element, 6))
+    return None;
+  ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3]));
+  ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4]));
+  auto It = Modules.find(ID);
+  if (It == Modules.end()) {
+    WithColor::error() << "unknown module ID\n";
+    reportLocation(Element.Fields[3].begin());
+    return None;
+  }
+  ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr,
+                        parseAddr(Element.Fields[5]));
+  return MMap{Addr, Size, It->second.get(), std::move(Mode),
+              ModuleRelativeAddr};
+}
+
+// Parse an address (%p in the spec).
+Optional<uint64_t> MarkupFilter::parseAddr(StringRef Str) const {
+  if (Str.empty()) {
+    reportTypeError(Str, "address");
+    return None;
+  }
+  if (all_of(Str, [](char C) { return C == '0'; }))
+    return 0;
+  if (!Str.startswith("0x")) {
+    reportTypeError(Str, "address");
+    return None;
+  }
+  uint64_t Addr;
+  if (Str.drop_front(2).getAsInteger(16, Addr)) {
+    reportTypeError(Str, "address");
+    return None;
+  }
+  return Addr;
+}
+
+// Parse a module ID (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseModuleID(StringRef Str) const {
+  uint64_t ID;
+  if (Str.getAsInteger(0, ID)) {
+    reportTypeError(Str, "module ID");
+    return None;
+  }
+  return ID;
+}
+
+// Parse a size (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const {
+  uint64_t ID;
+  if (Str.getAsInteger(0, ID)) {
+    reportTypeError(Str, "size");
+    return None;
+  }
+  return ID;
+}
+
+// Parse a build ID (%x in the spec).
+Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const {
+  std::string Bytes;
+  if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) {
+    reportTypeError(Str, "build ID");
+    return None;
+  }
+  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
+                            Bytes.size());
+  return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+}
+
+// Parses the mode string for an mmap element.
+Optional<std::string> MarkupFilter::parseMode(StringRef Str) const {
+  if (Str.empty()) {
+    reportTypeError(Str, "mode");
+    return None;
+  }
+
+  // Pop off each of r/R, w/W, and x/X from the front, in that order.
+  StringRef Remainder = Str;
+  if (!Remainder.empty() && tolower(Remainder.front()) == 'r')
+    Remainder = Remainder.drop_front();
+  if (!Remainder.empty() && tolower(Remainder.front()) == 'w')
+    Remainder = Remainder.drop_front();
+  if (!Remainder.empty() && tolower(Remainder.front()) == 'x')
+    Remainder = Remainder.drop_front();
+
+  // If anything remains, then the string wasn't a mode.
+  if (!Remainder.empty()) {
+    reportTypeError(Str, "mode");
+    return None;
+  }
+
+  // Normalize the mode.
+  return Str.lower();
+}
+
 bool MarkupFilter::checkTag(const MarkupNode &Node) const {
   if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) {
     WithColor::error(errs()) << "tags must be all lowercase characters\n";
@@ -126,18 +439,66 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const {
   return true;
 }
 
-bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const {
-  if (Node.Fields.size() != Size) {
+bool MarkupFilter::checkNumFields(const MarkupNode &Element,
+                                  size_t Size) const {
+  if (Element.Fields.size() != Size) {
     WithColor::error(errs()) << "expected " << Size << " fields; found "
-                             << Node.Fields.size() << "\n";
-    reportLocation(Node.Tag.end());
+                             << Element.Fields.size() << "\n";
+    reportLocation(Element.Tag.end());
     return false;
   }
   return true;
 }
 
+bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element,
+                                         size_t Size) const {
+  if (Element.Fields.size() < Size) {
+    WithColor::error(errs())
+        << "expected at least " << Size << " fields; found "
+        << Element.Fields.size() << "\n";
+    reportLocation(Element.Tag.end());
+    return false;
+  }
+  return true;
+}
+
+void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const {
+  WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str
+                           << "'\n";
+  reportLocation(Str.begin());
+}
+
+// Prints two lines that point out the given location in the current Line using
+// a caret. The iterator must be within the bounds of the most recent line
+// passed to beginLine().
 void MarkupFilter::reportLocation(StringRef::iterator Loc) const {
   errs() << Line;
   WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^';
   errs() << '\n';
 }
+
+// Checks for an existing mmap that overlaps the given one and returns a
+// pointer to one of them.
+const MarkupFilter::MMap *MarkupFilter::overlappingMMap(const MMap &Map) const {
+  // If the given map contains the start of another mmap, they overlap.
+  auto I = MMaps.upper_bound(Map.Addr);
+  if (I != MMaps.end() && Map.contains(I->second.Addr))
+    return &I->second;
+
+  // If no element starts inside the given mmap, the only possible overlap would
+  // be if the preceding mmap contains the start point of the given mmap.
+  if (I != MMaps.begin()) {
+    --I;
+    if (I->second.contains(Map.Addr))
+      return &I->second;
+  }
+  return nullptr;
+}
+
+StringRef MarkupFilter::lineEnding() const {
+  return Line.endswith("\r\n") ? "\r\n" : "\n";
+}
+
+bool MarkupFilter::MMap::contains(uint64_t Addr) const {
+  return this->Addr <= Addr && Addr < this->Addr + Size;
+}

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test b/llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test
new file mode 100644
index 0000000000000..af187f8954bd4
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test
@@ -0,0 +1,12 @@
+RUN: split-file %s %t
+RUN: llvm-symbolizer --filter-markup < %t/log | \
+RUN:   FileCheck --match-full-lines --implicit-check-not {{.}} \
+RUN:     --strict-whitespace %s
+
+CHECK:keep[[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=ab 0x0(r)[[END:\]{3}]]
+CHECK:keep[[BEGIN]]ELF module #0x1 "b.o"; BuildID=cd[[END]]
+
+;--- log
+keep{{{module:0:a.o:elf:ab}}}skip
+skip{{{mmap:0:1:load:0:r:0}}}skip
+keep{{{module:1:b.o:elf:cd}}}skip

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test b/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test
index 4d05bfd39ca99..400131d9e549b 100644
--- a/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test
@@ -1,5 +1,5 @@
 RUN: split-file %s %t
-RUN: llvm-symbolizer --debug-file-directory=%p/Inputs --filter-markup < %t/log > /dev/null 2> %t.err
+RUN: llvm-symbolizer --filter-markup < %t/log > /dev/null 2> %t.err
 RUN: FileCheck %s -input-file=%t.err --match-full-lines --strict-whitespace
 
 CHECK:error: expected 1 fields; found 0

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-mmap.test b/llvm/test/DebugInfo/symbolize-filter-markup-mmap.test
new file mode 100644
index 0000000000000..506d7a926baea
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-mmap.test
@@ -0,0 +1,40 @@
+RUN: split-file %s %t
+RUN: llvm-symbolizer --filter-markup < %t/log > %t.out 2> %t.err
+RUN: FileCheck %s --input-file=%t.out --match-full-lines \
+RUN:   --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR -input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abb50d82b6bdc861 0x0(rwx)-0x1(r)-0x2(w)-0x3(x)-0x4(rwx)-0xa(r)[[END:\]{3}]]
+
+ERR: error: expected at least 3 fields; found 0
+ERR: error: unknown mmap type
+ERR: error: expected 6 fields; found 3
+ERR: error: expected address; found '1'
+ERR: error: expected size; found '-1'
+ERR: error: expected mode; found ''
+ERR: error: expected mode; found 'g'
+ERR: error: expected mode; found 'wr'
+ERR: error: overlapping mmap: #0x0 [0xa,0xc)
+ERR: error: overlapping mmap: #0x0 [0xa,0xc)
+ERR: error: overlapping mmap: #0x0 [0xa,0xc)
+
+;--- log
+{{{module:0:a.o:elf:abb50d82b6bdc861}}}
+{{{mmap:0x1:1:load:0:r:0}}}
+{{{mmap:0x2:1:load:0:w:0}}}
+{{{mmap:0x3:1:load:0:x:0}}}
+{{{mmap:0x4:1:load:0:rwx:0}}}
+{{{mmap:0x0:1:load:0:RWX:0}}}
+{{{mmap:0xa:2:load:0:r:0}}}
+
+{{{mmap}}}
+{{{mmap:0:1:unknown}}}
+{{{mmap:0:10000000:load}}}
+{{{mmap:1:10000000:load:0:r:0}}}
+{{{mmap:0:-1:load:0:r:0}}}
+{{{mmap:0:10000000:load:0::0}}}
+{{{mmap:0:10000000:load:0:g:0}}}
+{{{mmap:0:10000000:load:0:wr:0}}}
+{{{mmap:0xa:1:load:0:r:0}}}
+{{{mmap:0x9:2:load:0:r:0}}}
+{{{mmap:0x9:5:load:0:r:0}}}

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-module.test b/llvm/test/DebugInfo/symbolize-filter-markup-module.test
new file mode 100644
index 0000000000000..74d6347941dc0
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-module.test
@@ -0,0 +1,26 @@
+RUN: split-file %s %t
+RUN: llvm-symbolizer --filter-markup < %t/log > %t.out 2> %t.err
+RUN: FileCheck %s --input-file=%t.out --match-full-lines \
+RUN:   --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR -input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=ab[[END:\]{3}]]
+CHECK: [[BEGIN]]ELF module #0x1 "b.o"; BuildID=abb50d82b6bdc861[[END]]
+CHECK: [[BEGIN]]ELF module #0x2 "c.o"; BuildID=cd[[END]]
+CHECK: [[BEGIN]]ELF module #0x1 "b.o"; adds 0x0(r)[[END]]
+
+ERR: error: expected at least 3 fields; found 0
+ERR: error: unknown module type
+ERR: error: duplicate module ID
+ERR: error: expected 4 fields; found 3
+
+;--- log
+{{{module:0:a.o:elf:ab}}}
+{{{module:1:b.o:elf:abb50d82b6bdc861}}}
+{{{module:2:c.o:elf:cd}}}
+{{{mmap:0:10000000:load:1:r:0}}}
+
+{{{module}}}
+{{{module:3:d.o:foo}}}
+{{{module:0:d.o:elf:ef}}}
+{{{module:4:d.o:elf}}}

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test b/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
new file mode 100644
index 0000000000000..13e1d7f786c48
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
@@ -0,0 +1,44 @@
+RUN: split-file %s %t
+RUN: llvm-symbolizer --filter-markup < %t/log 2> %t.err
+RUN: FileCheck %s -input-file=%t.err --match-full-lines
+
+CHECK-NOT: '0x4f'
+CHECK-NOT: '00'
+CHECK: error: expected address; found ''
+CHECK: error: expected address; found '42'
+CHECK: error: expected address; found '0xgg'
+
+CHECK-NOT: '0'
+CHECK: error: expected module ID; found ''
+CHECK: error: expected module ID; found '-1'
+CHECK-NOT: '077'
+CHECK: error: expected module ID; found '079'
+CHECK-NOT: '0xff'
+CHECK: error: expected module ID; found '0xfg'
+CHECK: error: expected module ID; found '0x'
+
+CHECK: error: expected build ID; found ''
+CHECK: error: expected build ID; found '0'
+CHECK-NOT: '0xff'
+CHECK: error: expected build ID; found 'fg'
+
+;--- log
+{{{mmap:0x4f:1:unknown}}}
+{{{mmap:00:1:unknown}}}
+{{{mmap::1:unknown}}}
+{{{mmap:42:1:unknown}}}
+{{{mmap:0xgg:1:unknown}}}
+
+{{{module:0::elf:00}}}
+{{{module:::elf:00}}}
+{{{module:-1::elf:00}}}
+{{{module:077::elf:00}}}
+{{{module:079::elf:00}}}
+{{{module:0xff::elf:00}}}
+{{{module:0xfg::elf:00}}}
+{{{module:0x::elf:00}}}
+
+{{{module:1::elf:}}}
+{{{module:2::elf:0}}}
+{{{module:3::elf:ff}}}
+{{{module:4::elf:fg}}}

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-reset.test b/llvm/test/DebugInfo/symbolize-filter-markup-reset.test
new file mode 100644
index 0000000000000..1abb90582dfea
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-reset.test
@@ -0,0 +1,21 @@
+RUN: split-file %s %t
+RUN: llvm-symbolizer --filter-markup < %t/log > %t.out 2> %t.err
+RUN: FileCheck %s --input-file=%t.out --match-full-lines \
+RUN:   --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR -input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=ab 0x0(r)[[END:\]{3}]]
+CHECK: {{  }}[[BEGIN]]reset[[END]]
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "b.o"; BuildID=cd 0x1(r)[[END:\]{3}]]
+
+ERR: error: expected 0 fields; found 1
+
+;--- log
+  {{{reset}}}
+{{{module:0:a.o:elf:ab}}}
+{{{mmap:0:1:load:0:r:0}}}
+  {{{reset}}}
+{{{module:0:b.o:elf:cd}}}
+{{{mmap:0x1:1:load:0:r:0}}}
+
+{{{reset:}}}

diff  --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index b782c7a1720ab..fb223d1ee8a90 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -365,20 +365,14 @@ static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args,
   return BuildID;
 }
 
-// Symbolize the markup from stdin and write the result to stdout.
+// Symbolize markup from stdin and write the result to stdout.
 static void filterMarkup(const opt::InputArgList &Args) {
-  MarkupParser Parser;
   MarkupFilter Filter(outs(), parseColorArg(Args));
   for (std::string InputString; std::getline(std::cin, InputString);) {
     InputString += '\n';
-    Parser.parseLine(InputString);
-    Filter.beginLine(InputString);
-    while (Optional<MarkupNode> Element = Parser.nextNode())
-      Filter.filter(*Element);
+    Filter.filter(InputString);
   }
-  Parser.flush();
-  while (Optional<MarkupNode> Element = Parser.nextNode())
-    Filter.filter(*Element);
+  Filter.finish();
 }
 
 ExitOnError ExitOnErr;


        


More information about the llvm-commits mailing list