[llvm] 070090d - [lldb] Add option to show memory tags in memory read output

Wed Jan 26 06:40:45 PST 2022

Author: David Spickett
Date: 2022-01-26T14:40:39Z
New Revision: 070090d08eb506187f7203bd3b321ca1e74d9d5c

URL: https://github.com/llvm/llvm-project/commit/070090d08eb506187f7203bd3b321ca1e74d9d5c
DIFF: https://github.com/llvm/llvm-project/commit/070090d08eb506187f7203bd3b321ca1e74d9d5c.diff

LOG: [lldb] Add option to show memory tags in memory read output

This adds an option --show-tags to "memory read".

(lldb) memory read mte_buf mte_buf+32 -f "x" -s8 --show-tags
0x900fffff7ff8000: 0x0000000000000000 0x0000000000000000 (tag: 0x0)
0x900fffff7ff8010: 0x0000000000000000 0x0000000000000000 (tag: 0x1)

Tags are printed on the end of each line, if that
line has any tags associated with it. Meaning that
untagged memory output is unchanged.

Tags are printed based on the granule(s) of memory that
a line covers. So you may have lines with 1 tag, with many
tags, no tags or partially tagged lines.

In the case of partially tagged lines, untagged granules
will show "<no tag>" so that the ordering is obvious.
For example, a line that covers 2 granules where the first
is not tagged:

(lldb) memory read mte_buf-16 mte_buf+16 -l32 -f"x" --show-tags
0x900fffff7ff7ff0: 0x00000000 <...> (tags: <no tag> 0x0)

Untagged lines will just not have the "(tags: ..." at all.
Though they may be part of a larger output that does have
some tagged lines.

To do this I've extended DumpDataExtractor to also print
memory tags where it has a valid execution context and
is asked to print them.

There are no special alignment requirements, simply
use "memory read" as usual. All alignment is handled
in DumpDataExtractor.

We use MakeTaggedRanges to find all the tagged memory
in the current dump, then read all that into a MemoryTagMap.

The tag map is populated once in DumpDataExtractor and re-used
for each subsequently printed line (or recursive call of
DumpDataExtractor, which some formats do).

Reviewed By: omjavaid

Differential Revision: https://reviews.llvm.org/D107140

Added: 
    

Modified: 
    lldb/include/lldb/Core/DumpDataExtractor.h
    lldb/source/Commands/CommandObjectMemory.cpp
    lldb/source/Commands/Options.td
    lldb/source/Core/DumpDataExtractor.cpp
    lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
    llvm/docs/ReleaseNotes.rst

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Core/DumpDataExtractor.h b/lldb/include/lldb/Core/DumpDataExtractor.h
index 12188609e8c0..08501b512a2b 100644

--- a/lldb/include/lldb/Core/DumpDataExtractor.h
+++ b/lldb/include/lldb/Core/DumpDataExtractor.h
@@ -76,6 +76,15 @@ class Stream;
 ///     same integer value. If the items being displayed are not
 ///     bitfields, this value should be zero.
 ///
+/// \param[in] exe_scope
+///     If provided, this will be used to lookup language specific
+///     information, address information and memory tags.
+///     (if they are requested by the other options)
+///
+/// \param[in] show_memory_tags
+///     If exe_scope and base_addr are valid, include memory tags
+///     in the output. This does not apply to certain formats.
+///
 /// \return
 ///     The offset at which dumping ended.
 lldb::offset_t
@@ -83,7 +92,8 @@ DumpDataExtractor(const DataExtractor &DE, Stream *s, lldb::offset_t offset,
                   lldb::Format item_format, size_t item_byte_size,
                   size_t item_count, size_t num_per_line, uint64_t base_addr,
                   uint32_t item_bit_size, uint32_t item_bit_offset,
-                  ExecutionContextScope *exe_scope = nullptr);
+                  ExecutionContextScope *exe_scope = nullptr,
+                  bool show_memory_tags = false);
 
 void DumpHexBytes(Stream *s, const void *src, size_t src_len,
                   uint32_t bytes_per_line, lldb::addr_t base_addr);

diff  --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index f5e2cb4ed44e..1b44a1bd709a 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -91,6 +91,10 @@ class OptionGroupReadMemory : public OptionGroup {
       error = m_offset.SetValueFromString(option_value);
       break;
 
+    case '\x01':
+      m_show_tags = true;
+      break;
+
     default:
       llvm_unreachable("Unimplemented option");
     }
@@ -104,6 +108,7 @@ class OptionGroupReadMemory : public OptionGroup {
     m_force = false;
     m_offset.Clear();
     m_language_for_type.Clear();
+    m_show_tags = false;
   }
 
   Status FinalizeSettings(Target *target, OptionGroupFormat &format_options) {
@@ -277,6 +282,7 @@ class OptionGroupReadMemory : public OptionGroup {
   bool m_force;
   OptionValueUInt64 m_offset;
   OptionValueLanguage m_language_for_type;
+  bool m_show_tags = false;
 };
 
 // Read memory from the inferior process
@@ -860,7 +866,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed {
     size_t bytes_dumped = DumpDataExtractor(
         data, output_stream_p, 0, format, item_byte_size, item_count,
         num_per_line / target->GetArchitecture().GetDataByteSize(), addr, 0, 0,
-        exe_scope);
+        exe_scope, m_memory_options.m_show_tags);
     m_next_addr = addr + bytes_dumped;
     output_stream_p->EOL();
     return true;

diff  --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 7fbf0ab03995..269c937296d5 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -488,6 +488,8 @@ let Command = "memory read" in {
     "display data.">;
   def memory_read_force : Option<"force", "r">, Groups<[1,2,3]>,
     Desc<"Necessary if reading over target.max-memory-read-size bytes.">;
+  def memory_read_show_tags : Option<"show-tags", "\\x01">, Group<1>,
+    Desc<"Include memory tags in output (does not apply to binary output).">;
 }
 
 let Command = "memory find" in {

diff  --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp
index 175ffef04a81..211e16a2e033 100644
--- a/lldb/source/Core/DumpDataExtractor.cpp
+++ b/lldb/source/Core/DumpDataExtractor.cpp
@@ -17,6 +17,9 @@
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/ExecutionContextScope.h"
+#include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Target/MemoryTagManager.h"
+#include "lldb/Target/MemoryTagMap.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/SectionLoadList.h"
 #include "lldb/Target/Target.h"
@@ -253,6 +256,85 @@ void DumpFloatingPoint(std::ostringstream &ss, FloatT f) {
   ss << f;
 }
 
+static llvm::Optional<MemoryTagMap>
+GetMemoryTags(lldb::addr_t addr, size_t length,
+              ExecutionContextScope *exe_scope) {
+  assert(addr != LLDB_INVALID_ADDRESS);
+
+  if (!exe_scope)
+    return llvm::None;
+
+  TargetSP target_sp = exe_scope->CalculateTarget();
+  if (!target_sp)
+    return llvm::None;
+
+  ProcessSP process_sp = target_sp->CalculateProcess();
+  if (!process_sp)
+    return llvm::None;
+
+  llvm::Expected<const MemoryTagManager *> tag_manager_or_err =
+      process_sp->GetMemoryTagManager();
+  if (!tag_manager_or_err) {
+    llvm::consumeError(tag_manager_or_err.takeError());
+    return llvm::None;
+  }
+
+  MemoryRegionInfos memory_regions;
+  // Don't check return status, list will be just empty if an error happened.
+  process_sp->GetMemoryRegions(memory_regions);
+
+  llvm::Expected<std::vector<MemoryTagManager::TagRange>> tagged_ranges_or_err =
+      (*tag_manager_or_err)
+          ->MakeTaggedRanges(addr, addr + length, memory_regions);
+  // Here we know that our range will not be inverted but we must still check
+  // for an error.
+  if (!tagged_ranges_or_err) {
+    llvm::consumeError(tagged_ranges_or_err.takeError());
+    return llvm::None;
+  }
+  if (tagged_ranges_or_err->empty())
+    return llvm::None;
+
+  MemoryTagMap memory_tag_map(*tag_manager_or_err);
+  for (const MemoryTagManager::TagRange &range : *tagged_ranges_or_err) {
+    llvm::Expected<std::vector<lldb::addr_t>> tags_or_err =
+        process_sp->ReadMemoryTags(range.GetRangeBase(), range.GetByteSize());
+
+    if (tags_or_err)
+      memory_tag_map.InsertTags(range.GetRangeBase(), *tags_or_err);
+    else
+      llvm::consumeError(tags_or_err.takeError());
+  }
+
+  if (memory_tag_map.Empty())
+    return llvm::None;
+
+  return memory_tag_map;
+}
+
+static void
+printMemoryTags(const DataExtractor &DE, Stream *s, lldb::addr_t addr,
+                size_t len,
+                const llvm::Optional<MemoryTagMap> &memory_tag_map) {
+  std::vector<llvm::Optional<lldb::addr_t>> tags =
+      memory_tag_map->GetTags(addr, len);
+
+  // Only print if there is at least one tag for this line
+  if (tags.empty())
+    return;
+
+  s->Printf(" (tag%s:", tags.size() > 1 ? "s" : "");
+  // Some granules may not be tagged but print something for them
+  // so that the ordering remains intact.
+  for (auto tag : tags) {
+    if (tag)
+      s->Printf(" 0x%" PRIx64, *tag);
+    else
+      s->PutCString(" <no tag>");
+  }
+  s->PutCString(")");
+}
+
 lldb::offset_t lldb_private::DumpDataExtractor(
     const DataExtractor &DE, Stream *s, offset_t start_offset,
     lldb::Format item_format, size_t item_byte_size, size_t item_count,
@@ -261,7 +343,7 @@ lldb::offset_t lldb_private::DumpDataExtractor(
                               // non-zero, the value is a bitfield
     uint32_t item_bit_offset, // If "item_bit_size" is non-zero, this is the
                               // shift amount to apply to a bitfield
-    ExecutionContextScope *exe_scope) {
+    ExecutionContextScope *exe_scope, bool show_memory_tags) {
   if (s == nullptr)
     return start_offset;
 
@@ -272,6 +354,11 @@ lldb::offset_t lldb_private::DumpDataExtractor(
 
   offset_t offset = start_offset;
 
+  llvm::Optional<MemoryTagMap> memory_tag_map = llvm::None;
+  if (show_memory_tags && base_addr != LLDB_INVALID_ADDRESS)
+    memory_tag_map =
+        GetMemoryTags(base_addr, DE.GetByteSize() - offset, exe_scope);
+
   if (item_format == eFormatInstruction)
     return DumpInstructions(DE, s, exe_scope, start_offset, base_addr,
                             item_count);
@@ -283,7 +370,10 @@ lldb::offset_t lldb_private::DumpDataExtractor(
   lldb::offset_t line_start_offset = start_offset;
   for (uint32_t count = 0; DE.ValidOffset(offset) && count < item_count;
        ++count) {
+    // If we are at the beginning or end of a line
+    // Note that the last line is handled outside this for loop.
     if ((count % num_per_line) == 0) {
+      // If we are at the end of a line
       if (count > 0) {
         if (item_format == eFormatBytesWithASCII &&
             offset > line_start_offset) {
@@ -295,6 +385,15 @@ lldb::offset_t lldb_private::DumpDataExtractor(
                             offset - line_start_offset, SIZE_MAX,
                             LLDB_INVALID_ADDRESS, 0, 0);
         }
+
+        if (base_addr != LLDB_INVALID_ADDRESS && memory_tag_map) {
+          size_t line_len = offset - line_start_offset;
+          lldb::addr_t line_base =
+              base_addr +
+              (offset - start_offset - line_len) / DE.getTargetByteSize();
+          printMemoryTags(DE, s, line_base, line_len, memory_tag_map);
+        }
+
         s->EOL();
       }
       if (base_addr != LLDB_INVALID_ADDRESS)
@@ -796,14 +895,28 @@ lldb::offset_t lldb_private::DumpDataExtractor(
     }
   }
 
-  if (item_format == eFormatBytesWithASCII && offset > line_start_offset) {
-    s->Printf("%*s", static_cast<int>(
-                         (num_per_line - (offset - line_start_offset)) * 3 + 2),
-              "");
-    DumpDataExtractor(DE, s, line_start_offset, eFormatCharPrintable, 1,
-                      offset - line_start_offset, SIZE_MAX,
-                      LLDB_INVALID_ADDRESS, 0, 0);
+  // If anything was printed we want to catch the end of the last line.
+  // Since we will exit the for loop above before we get a chance to append to
+  // it normally.
+  if (offset > line_start_offset) {
+    if (item_format == eFormatBytesWithASCII) {
+      s->Printf("%*s",
+                static_cast<int>(
+                    (num_per_line - (offset - line_start_offset)) * 3 + 2),
+                "");
+      DumpDataExtractor(DE, s, line_start_offset, eFormatCharPrintable, 1,
+                        offset - line_start_offset, SIZE_MAX,
+                        LLDB_INVALID_ADDRESS, 0, 0);
+    }
+
+    if (base_addr != LLDB_INVALID_ADDRESS && memory_tag_map) {
+      size_t line_len = offset - line_start_offset;
+      lldb::addr_t line_base = base_addr + (offset - start_offset - line_len) /
+                                               DE.getTargetByteSize();
+      printMemoryTags(DE, s, line_base, line_len, memory_tag_map);
+    }
   }
+
   return offset; // Return the offset at which we ended up
 }
 

diff  --git a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
index 983f283444a4..c95c2ae32753 100644
--- a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
+++ b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
@@ -286,3 +286,128 @@ def test_mte_tag_write(self):
                           "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x3 \(mismatch\)\n"
                           "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x3 \(mismatch\)\n"
                           "\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\): 0x0$"])
+
+    @skipUnlessArch("aarch64")
+    @skipUnlessPlatform(["linux"])
+    @skipUnlessAArch64MTELinuxCompiler
+    def test_mte_memory_read_tag_display(self):
+        self.setup_mte_test()
+
+        # Reading from an untagged range should not be any 
diff erent
+        self.expect("memory read non_mte_buf non_mte_buf+16",
+                substrs=["tag"], matching=False)
+
+        # show-tags option is required
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" -l 1 -s 16",
+                patterns=["tag"], matching=False)
+
+        # Reading 16 bytes per line means 1 granule and so 1 tag per line
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" -l 1 -s 16 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n"
+                    "0x[0-9A-fa-f]+10: 0x0+ \(tag: 0x1\)"
+                    ])
+
+        # If bytes per line is > granule size then you get multiple tags
+        # per line.
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tags: 0x0 0x1\)\n"
+                    ])
+
+        # Reading half a granule still shows you the tag for that granule
+        self.expect("memory read mte_buf mte_buf+8 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n"
+                    ])
+
+        # We can read a whole number of granules but split them over more lines
+        # than there are granules. Tags are shown repeated for each applicable line.
+        self.expect("memory read mte_buf+32 mte_buf+64 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+20: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+28: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+30: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+38: 0x0+ \(tag: 0x3\)"
+                    ])
+
+        # Also works if we misalign the start address. Note the first tag is shown
+        # only once here and we have a new tag on the last line.
+        # (bytes per line == the misalignment here)
+        self.expect("memory read mte_buf+32+8 mte_buf+64+8 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+28: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+30: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+38: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+40: 0x0+ \(tag: 0x4\)"
+                    ])
+
+        # We can do the same thing but where the misaligment isn't equal to
+        # bytes per line. This time, some lines cover multiple granules and
+        # so show multiple tags.
+        self.expect("memory read mte_buf+32+4 mte_buf+64+4 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+24: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+2c: 0x0+ \(tags: 0x2 0x3\)\n"
+                    "0x[0-9A-fa-f]+34: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+3c: 0x0+ \(tags: 0x3 0x4\)"
+                    ])
+
+        # If you read a range that includes non tagged areas those areas
+        # simply aren't annotated.
+
+        # Initial part of range is untagged
+        self.expect("memory read mte_buf-16 mte_buf+32 -f \"x\" -l 1 -s 16 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+f0: 0x0+\n"
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n"
+                    "0x[0-9A-fa-f]+10: 0x0+ \(tag: 0x1\)"
+                    ])
+
+        # End of range is untagged
+        self.expect("memory read mte_buf+page_size-16 mte_buf+page_size+16 -f \"x\" -l 1 -s 16 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+f0: 0x0+ \(tag: 0xf\)\n"
+                    "0x[0-9A-fa-f]+00: 0x0+"
+                    ])
+
+        # The smallest MTE range we can get is a single page so we just check
+        # parts of this result. Where we read from before the tagged page to after it.
+        # Add --force here because we're reading just over 4k.
+        self.expect(
+                "memory read mte_read_only-16 mte_read_only+page_size+16 -f \"x\" -l 1 -s 16 --force --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+f0: 0x0+\n"
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n",
+                    "0x[0-9A-fa-f]+f0: 0x0+ \(tag: 0x0\)\n"
+                    "0x[0-9A-fa-f]+00: 0x0+"
+                    ])
+
+        # Some parts of a line might be tagged and others untagged.
+        # <no tag> is shown in where the tag would be, to keep the order intact.
+        self.expect("memory read mte_buf-16 mte_buf+32 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=["0x[0-9A-fa-f]+f0: 0x0+ \(tags: <no tag> 0x0\)"])
+        self.expect(
+                "memory read mte_read_only+page_size-16 mte_read_only+page_size+16 -f \"x\" -l 1 -s 32  --show-tags",
+                patterns=["0x[0-9A-fa-f]+f0: 0x0+ \(tags: 0x0 <no tag>\)"])
+
+        # Here the start address is unaligned so we cover 3 granules instead of 2
+        self.expect("memory read mte_buf-16+4 mte_buf+32+4 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=["0x[0-9A-fa-f]+f4: 0x0+ \(tags: <no tag> 0x0 0x1\)"])
+        self.expect(
+                "memory read mte_read_only+page_size-16+4 mte_read_only+page_size+16+4 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=["0x[0-9A-fa-f]+f4: 0x0+ \(tags: 0x0 <no tag> <no tag>\)"])
+
+        # Some formats call DumpDataExtractor multiple times,
+        # check that those print tags only once per line.
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" --show-tags",
+                patterns=["0x[0-9A-fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)\n",
+                          "0x[0-9A-fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)"])
+
+        self.expect("memory read mte_buf mte_buf+32 -f \"bytes with ASCII\" --show-tags",
+                patterns=["0x[0-9A-fa-f]+00: (00 ){16} \.{16} \(tag: 0x0\)\n",
+                          "0x[0-9A-fa-f]+10: (00 ){16} \.{16} \(tag: 0x1\)"])
+
+        self.expect("memory read mte_buf mte_buf+32 -f \"uint8_t[]\" -s 16 -l 1 --show-tags",
+                patterns=["0x[0-9A-Fa-f]+00: \{(0x00 ){15}0x00\} \(tag: 0x0\)\n"
+                          "0x[0-9A-Fa-f]+10: \{(0x00 ){15}0x00\} \(tag: 0x1\)"])

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 15f7428a7fde..7718bcc95910 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -172,6 +172,9 @@ Changes to LLDB
   * ``memory tag read``
   * ``memory tag write``
 
+* The ``memory read`` command has a new option ``--show-tags``. Use this option
+  to show memory tags beside the contents of tagged memory ranges.
+
 Changes to Sanitizers
 ---------------------