[Lldb-commits] [lldb] ade59d5 - [trace] Dedup different source lines when dumping instructions + refactor

Walter Erquinigo via lldb-commits lldb-commits at lists.llvm.org
Tue May 4 19:42:04 PDT 2021


Author: Walter Erquinigo
Date: 2021-05-04T19:40:52-07:00
New Revision: ade59d530964e28498051ab20e44cbf6594be595

URL: https://github.com/llvm/llvm-project/commit/ade59d530964e28498051ab20e44cbf6594be595
DIFF: https://github.com/llvm/llvm-project/commit/ade59d530964e28498051ab20e44cbf6594be595.diff

LOG: [trace] Dedup different source lines when dumping instructions + refactor

When dumping the traced instructions in a for loop, like this one

  4:  for (int a = 0; a < n; a++)
  5:    do something;

there might be multiple LineEntry objects for line 4, but with different address ranges. This was causing the dump command to dump something like this:

```
  a.out`main + 11 at main.cpp:4
    [1] 0x0000000000400518    movl   $0x0, -0x8(%rbp)
    [2] 0x000000000040051f    jmp    0x400529                  ; <+28> at main.cpp:4
  a.out`main + 28 at main.cpp:4
    [3] 0x0000000000400529    cmpl   $0x3, -0x8(%rbp)
    [4] 0x000000000040052d    jle    0x400521                  ; <+20> at main.cpp:5
```

which is confusing, as main.cpp:4 appears twice consecutively.

This diff fixes that issue by making the line entry comparison strictly about the line, column and file name. Before it was also comparing the address ranges, which we don't need because our output is strictly about what the user sees in the source.

Besides, I've noticed that the logic that traverses instructions and calculates symbols and disassemblies had too much coupling, and made my changes harder to implement, so I decided to decouple it. Now there are two methods for iterating over the instruction of a trace. The existing one does it on raw load addresses, but the one provides a SymbolContext and an InstructionSP, and does the calculations efficiently (not as efficient as possible for now though), so the caller doesn't need to care about these details. I think I'll be using that iterator to reconstruct the call stacks.

I was able to fix a test with this change.

Differential Revision: https://reviews.llvm.org/D100740

Added: 
    

Modified: 
    lldb/include/lldb/Core/AddressRange.h
    lldb/include/lldb/Target/Trace.h
    lldb/source/Core/AddressRange.cpp
    lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
    lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
    lldb/source/Target/Trace.cpp
    lldb/test/API/commands/trace/TestTraceStartStop.py

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Core/AddressRange.h b/lldb/include/lldb/Core/AddressRange.h
index 8ccf96a436a1..d7017840197d 100644
--- a/lldb/include/lldb/Core/AddressRange.h
+++ b/lldb/include/lldb/Core/AddressRange.h
@@ -94,8 +94,7 @@ class AddressRange {
   /// \return
   ///     Returns \b true if \a so_addr is contained in this range,
   ///     \b false otherwise.
-  //    bool
-  //    Contains (const Address &so_addr) const;
+  bool Contains(const Address &so_addr) const;
 
   /// Check if a section offset address is contained in this range.
   ///

diff  --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h
index 403b1f868a59..81585fa7be40 100644
--- a/lldb/include/lldb/Target/Trace.h
+++ b/lldb/include/lldb/Target/Trace.h
@@ -210,8 +210,9 @@ class Trace : public PluginInterface,
   ///     The thread whose trace will be inspected.
   ///
   /// \return
-  ///     The total number of instructions in the trace.
-  virtual size_t GetInstructionCount(Thread &thread) = 0;
+  ///     The total number of instructions in the trace, or \a llvm::None if the
+  ///     thread is not being traced.
+  virtual llvm::Optional<size_t> GetInstructionCount(Thread &thread) = 0;
 
   /// Check if a thread is currently traced by this object.
   ///

diff  --git a/lldb/source/Core/AddressRange.cpp b/lldb/source/Core/AddressRange.cpp
index 0868ac5e0888..aeb99f857a78 100644
--- a/lldb/source/Core/AddressRange.cpp
+++ b/lldb/source/Core/AddressRange.cpp
@@ -8,6 +8,7 @@
 
 #include "lldb/Core/AddressRange.h"
 #include "lldb/Core/Module.h"
+#include "lldb/Core/Section.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/FileSpec.h"
@@ -42,14 +43,22 @@ AddressRange::AddressRange(const Address &so_addr, addr_t byte_size)
 
 AddressRange::~AddressRange() {}
 
-// bool
-// AddressRange::Contains (const Address &addr) const
-//{
-//    const addr_t byte_size = GetByteSize();
-//    if (byte_size)
-//        return addr.GetSection() == m_base_addr.GetSection() &&
-//        (addr.GetOffset() - m_base_addr.GetOffset()) < byte_size;
-//}
+bool AddressRange::Contains(const Address &addr) const {
+  SectionSP range_sect_sp = GetBaseAddress().GetSection();
+  SectionSP addr_sect_sp = addr.GetSection();
+  if (range_sect_sp) {
+    if (!addr_sect_sp ||
+        range_sect_sp->GetModule() != addr_sect_sp->GetModule())
+      return false; // Modules do not match.
+  } else if (addr_sect_sp) {
+    return false; // Range has no module but "addr" does because addr has a
+                  // section
+  }
+  // Either the modules match, or both have no module, so it is ok to compare
+  // the file addresses in this case only.
+  return ContainsFileAddress(addr);
+}
+
 //
 // bool
 // AddressRange::Contains (const Address *addr) const

diff  --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
index 96c4a89bd1cb..b8b8012fc86a 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -122,11 +122,11 @@ void TraceIntelPT::TraverseInstructions(
       break;
 }
 
-size_t TraceIntelPT::GetInstructionCount(Thread &thread) {
+Optional<size_t> TraceIntelPT::GetInstructionCount(Thread &thread) {
   if (const DecodedThread *decoded_thread = Decode(thread))
     return decoded_thread->GetInstructions().size();
   else
-    return 0;
+    return None;
 }
 
 Expected<pt_cpu> TraceIntelPT::GetCPUInfoForLiveProcess() {

diff  --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
index f19e516498e9..52b336777555 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
@@ -70,7 +70,7 @@ class TraceIntelPT : public Trace {
       std::function<bool(size_t index, llvm::Expected<lldb::addr_t> load_addr)>
           callback) override;
 
-  size_t GetInstructionCount(Thread &thread) override;
+  llvm::Optional<size_t> GetInstructionCount(Thread &thread) override;
 
   size_t GetCursorPosition(Thread &thread) override;
 

diff  --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp
index 7f58dbc70039..451f89126001 100644
--- a/lldb/source/Target/Trace.cpp
+++ b/lldb/source/Target/Trace.cpp
@@ -13,6 +13,7 @@
 #include "lldb/Core/Module.h"
 #include "lldb/Core/PluginManager.h"
 #include "lldb/Symbol/Function.h"
+#include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/SectionLoadList.h"
 #include "lldb/Target/Thread.h"
@@ -101,189 +102,255 @@ static int GetNumberOfDigits(size_t num) {
   return num == 0 ? 1 : static_cast<int>(log10(num)) + 1;
 }
 
-/// Dump the symbol context of the given instruction address if it's 
diff erent
-/// from the symbol context of the previous instruction in the trace.
-///
-/// \param[in] prev_sc
-///     The symbol context of the previous instruction in the trace.
-///
-/// \param[in] address
-///     The address whose symbol information will be dumped.
-///
 /// \return
-///     The symbol context of the current address, which might 
diff er from the
-///     previous one.
-static SymbolContext DumpSymbolContext(Stream &s, const SymbolContext &prev_sc,
-                                       Target &target, const Address &address) {
-  AddressRange range;
-  if (prev_sc.GetAddressRange(eSymbolContextEverything, 0,
-                              /*inline_block_range*/ false, range) &&
-      range.ContainsFileAddress(address))
-    return prev_sc;
+///     \b true if the provided line entries match line, column and source file.
+///     This function assumes that the line entries are valid.
+static bool FileLineAndColumnMatches(const LineEntry &a, const LineEntry &b) {
+  if (a.line != b.line)
+    return false;
+  if (a.column != b.column)
+    return false;
+  return a.file == b.file;
+}
+
+// This custom LineEntry validator is neded because some line_entries have
+// 0 as line, which is meaningless. Notice that LineEntry::IsValid only
+// checks that line is not LLDB_INVALID_LINE_NUMBER, i.e. UINT32_MAX.
+static bool IsLineEntryValid(const LineEntry &line_entry) {
+  return line_entry.IsValid() && line_entry.line > 0;
+}
 
+/// Helper structure for \a TraverseInstructionsWithSymbolInfo.
+struct InstructionSymbolInfo {
   SymbolContext sc;
-  address.CalculateSymbolContext(&sc, eSymbolContextEverything);
+  Address address;
+  lldb::addr_t load_address;
+  lldb::DisassemblerSP disassembler;
+  lldb::InstructionSP instruction;
+  lldb_private::ExecutionContext exe_ctx;
+};
 
-  if (!prev_sc.module_sp && !sc.module_sp)
-    return sc;
-  if (prev_sc.module_sp == sc.module_sp && !sc.function && !sc.symbol &&
-      !prev_sc.function && !prev_sc.symbol)
+/// InstructionSymbolInfo object with symbol information for the given
+/// instruction, calculated efficiently.
+///
+/// \param[in] symbol_scope
+///     If not \b 0, then the \a InstructionSymbolInfo will have its
+///     SymbolContext calculated up to that level.
+///
+/// \param[in] include_disassembler
+///     If \b true, then the \a InstructionSymbolInfo will have the
+///     \a disassembler and \a instruction objects calculated.
+static void TraverseInstructionsWithSymbolInfo(
+    Trace &trace, Thread &thread, size_t position,
+    Trace::TraceDirection direction, SymbolContextItem symbol_scope,
+    bool include_disassembler,
+    std::function<bool(size_t index, Expected<InstructionSymbolInfo> insn)>
+        callback) {
+  InstructionSymbolInfo prev_insn;
+
+  Target &target = thread.GetProcess()->GetTarget();
+  ExecutionContext exe_ctx;
+  target.CalculateExecutionContext(exe_ctx);
+  const ArchSpec &arch = target.GetArchitecture();
+
+  // Find the symbol context for the given address reusing the previous
+  // instruction's symbol context when possible.
+  auto calculate_symbol_context = [&](const Address &address) {
+    AddressRange range;
+    if (prev_insn.sc.GetAddressRange(symbol_scope, 0,
+                                     /*inline_block_range*/ false, range) &&
+        range.Contains(address))
+      return prev_insn.sc;
+
+    SymbolContext sc;
+    address.CalculateSymbolContext(&sc, symbol_scope);
     return sc;
+  };
 
-  s.Printf("  ");
+  // Find the disassembler for the given address reusing the previous
+  // instruction's disassembler when possible.
+  auto calculate_disass = [&](const Address &address, const SymbolContext &sc) {
+    if (prev_insn.disassembler) {
+      if (InstructionSP instruction =
+              prev_insn.disassembler->GetInstructionList()
+                  .GetInstructionAtAddress(address))
+        return std::make_tuple(prev_insn.disassembler, instruction);
+    }
+
+    if (sc.function) {
+      if (DisassemblerSP disassembler =
+              sc.function->GetInstructions(exe_ctx, nullptr)) {
+        if (InstructionSP instruction =
+                disassembler->GetInstructionList().GetInstructionAtAddress(
+                    address))
+          return std::make_tuple(disassembler, instruction);
+      }
+    }
+    // We fallback to a single instruction disassembler
+    AddressRange range(address, arch.GetMaximumOpcodeByteSize());
+    DisassemblerSP disassembler =
+        Disassembler::DisassembleRange(arch, /*plugin_name*/ nullptr,
+                                       /*flavor*/ nullptr, target, range);
+    return std::make_tuple(disassembler,
+                           disassembler ? disassembler->GetInstructionList()
+                                              .GetInstructionAtAddress(address)
+                                        : InstructionSP());
+  };
 
-  if (!sc.module_sp)
-    s.Printf("(none)");
-  else if (!sc.function && !sc.symbol)
-    s.Printf("%s`(none)",
-             sc.module_sp->GetFileSpec().GetFilename().AsCString());
-  else
-    sc.DumpStopContext(&s, &target, address, /*show_fullpath*/ false,
-                       /*show_module*/ true, /*show_inlined_frames*/ false,
-                       /*show_function_arguments*/ true,
-                       /*show_function_name*/ true);
-  s.Printf("\n");
-  return sc;
+  trace.TraverseInstructions(
+      thread, position, direction,
+      [&](size_t index, Expected<lldb::addr_t> load_address) -> bool {
+        if (!load_address)
+          return callback(index, load_address.takeError());
+
+        InstructionSymbolInfo insn;
+        insn.load_address = *load_address;
+        insn.exe_ctx = exe_ctx;
+        insn.address.SetLoadAddress(*load_address, &target);
+        if (symbol_scope != 0)
+          insn.sc = calculate_symbol_context(insn.address);
+        if (include_disassembler)
+          std::tie(insn.disassembler, insn.instruction) =
+              calculate_disass(insn.address, insn.sc);
+        prev_insn = insn;
+        return callback(index, insn);
+      });
 }
 
-/// Dump an instruction given by its address using a given disassembler, unless
-/// the instruction is not present in the disassembler.
-///
-/// \param[in] disassembler
-///     A disassembler containing a certain instruction list.
-///
-/// \param[in] address
-///     The address of the instruction to dump.
+/// Compare the symbol contexts of the provided \a InstructionSymbolInfo
+/// objects.
 ///
 /// \return
-///     \b true if the information could be dumped, \b false otherwise.
-static bool TryDumpInstructionInfo(Stream &s,
-                                   const DisassemblerSP &disassembler,
-                                   const ExecutionContext &exe_ctx,
-                                   const Address &address) {
-  if (!disassembler)
+///     \a true if both instructions belong to the same scope level analized
+///     in the following order:
+///       - module
+///       - symbol
+///       - function
+///       - line
+static bool
+IsSameInstructionSymbolContext(const InstructionSymbolInfo &prev_insn,
+                               const InstructionSymbolInfo &insn) {
+  // module checks
+  if (insn.sc.module_sp != prev_insn.sc.module_sp)
+    return false;
+
+  // symbol checks
+  if (insn.sc.symbol != prev_insn.sc.symbol)
     return false;
 
-  if (InstructionSP instruction =
-          disassembler->GetInstructionList().GetInstructionAtAddress(address)) {
-    instruction->Dump(&s, /*show_address*/ false, /*show_bytes*/ false,
-                      /*max_opcode_byte_size*/ 0, &exe_ctx,
-                      /*sym_ctx*/ nullptr, /*prev_sym_ctx*/ nullptr,
-                      /*disassembly_addr_format*/ nullptr,
-                      /*max_address_text_size*/ 0);
+  // function checks
+  if (!insn.sc.function && !prev_insn.sc.function)
     return true;
-  }
+  else if (insn.sc.function != prev_insn.sc.function)
+    return false;
 
-  return false;
+  // line entry checks
+  const bool curr_line_valid = IsLineEntryValid(insn.sc.line_entry);
+  const bool prev_line_valid = IsLineEntryValid(prev_insn.sc.line_entry);
+  if (curr_line_valid && prev_line_valid)
+    return FileLineAndColumnMatches(insn.sc.line_entry,
+                                    prev_insn.sc.line_entry);
+  return curr_line_valid == prev_line_valid;
 }
 
-/// Dump an instruction instruction given by its address.
+/// Dump the symbol context of the given instruction address if it's 
diff erent
+/// from the symbol context of the previous instruction in the trace.
 ///
-/// \param[in] prev_disassembler
-///     The disassembler that was used to dump the previous instruction in the
-///     trace. It is useful to avoid recomputations.
+/// \param[in] prev_sc
+///     The symbol context of the previous instruction in the trace.
 ///
 /// \param[in] address
-///     The address of the instruction to dump.
+///     The address whose symbol information will be dumped.
 ///
 /// \return
-///     A disassembler that contains the given instruction, which might 
diff er
-///     from the previous disassembler.
-static DisassemblerSP
-DumpInstructionInfo(Stream &s, const SymbolContext &sc,
-                    const DisassemblerSP &prev_disassembler,
-                    ExecutionContext &exe_ctx, const Address &address) {
-  // We first try to use the previous disassembler
-  if (TryDumpInstructionInfo(s, prev_disassembler, exe_ctx, address))
-    return prev_disassembler;
-
-  // Now we try using the current function's disassembler
-  if (sc.function) {
-    DisassemblerSP disassembler =
-        sc.function->GetInstructions(exe_ctx, nullptr);
-    if (TryDumpInstructionInfo(s, disassembler, exe_ctx, address))
-      return disassembler;
-  }
+///     The symbol context of the current address, which might 
diff er from the
+///     previous one.
+static void
+DumpInstructionSymbolContext(Stream &s,
+                             Optional<InstructionSymbolInfo> prev_insn,
+                             InstructionSymbolInfo &insn) {
+  if (prev_insn && IsSameInstructionSymbolContext(*prev_insn, insn))
+    return;
 
-  // We fallback to disassembly one instruction
-  Target &target = exe_ctx.GetTargetRef();
-  const ArchSpec &arch = target.GetArchitecture();
-  AddressRange range(address, arch.GetMaximumOpcodeByteSize() * 1);
-  DisassemblerSP disassembler =
-      Disassembler::DisassembleRange(arch, /*plugin_name*/ nullptr,
-                                     /*flavor*/ nullptr, target, range);
-  if (TryDumpInstructionInfo(s, disassembler, exe_ctx, address))
-    return disassembler;
-  return nullptr;
+  s.Printf("  ");
+
+  if (!insn.sc.module_sp)
+    s.Printf("(none)");
+  else if (!insn.sc.function && !insn.sc.symbol)
+    s.Printf("%s`(none)",
+             insn.sc.module_sp->GetFileSpec().GetFilename().AsCString());
+  else
+    insn.sc.DumpStopContext(&s, insn.exe_ctx.GetTargetPtr(), insn.address,
+                            /*show_fullpath*/ false,
+                            /*show_module*/ true, /*show_inlined_frames*/ false,
+                            /*show_function_arguments*/ true,
+                            /*show_function_name*/ true);
+  s.Printf("\n");
+}
+
+static void DumpInstructionDisassembly(Stream &s, InstructionSymbolInfo &insn) {
+  if (!insn.instruction)
+    return;
+  insn.instruction->Dump(&s, /*show_address*/ false, /*show_bytes*/ false,
+                         /*max_opcode_byte_size*/ 0, &insn.exe_ctx, &insn.sc,
+                         /*prev_sym_ctx*/ nullptr,
+                         /*disassembly_addr_format*/ nullptr,
+                         /*max_address_text_size*/ 0);
 }
 
 void Trace::DumpTraceInstructions(Thread &thread, Stream &s, size_t count,
                                   size_t end_position, bool raw) {
-  if (!IsTraced(thread)) {
+  Optional<size_t> instructions_count = GetInstructionCount(thread);
+  if (!instructions_count) {
     s.Printf("thread #%u: tid = %" PRIu64 ", not traced\n", thread.GetIndexID(),
              thread.GetID());
     return;
   }
 
-  size_t instructions_count = GetInstructionCount(thread);
   s.Printf("thread #%u: tid = %" PRIu64 ", total instructions = %zu\n",
-           thread.GetIndexID(), thread.GetID(), instructions_count);
+           thread.GetIndexID(), thread.GetID(), *instructions_count);
 
-  if (count == 0 || end_position >= instructions_count)
+  if (count == 0 || end_position >= *instructions_count)
     return;
 
+  int digits_count = GetNumberOfDigits(end_position);
   size_t start_position =
       end_position + 1 < count ? 0 : end_position + 1 - count;
-
-  int digits_count = GetNumberOfDigits(end_position);
   auto printInstructionIndex = [&](size_t index) {
     s.Printf("    [%*zu] ", digits_count, index);
   };
 
   bool was_prev_instruction_an_error = false;
-  Target &target = thread.GetProcess()->GetTarget();
+  Optional<InstructionSymbolInfo> prev_insn;
 
-  SymbolContext sc;
-  DisassemblerSP disassembler;
-  ExecutionContext exe_ctx;
-  target.CalculateExecutionContext(exe_ctx);
+  TraverseInstructionsWithSymbolInfo(
+      *this, thread, start_position, TraceDirection::Forwards,
+      eSymbolContextEverything, /*disassembler*/ true,
+      [&](size_t index, Expected<InstructionSymbolInfo> insn) -> bool {
+        if (!insn) {
+          printInstructionIndex(index);
+          s << toString(insn.takeError());
 
-  TraverseInstructions(
-      thread, start_position, TraceDirection::Forwards,
-      [&](size_t index, Expected<lldb::addr_t> load_address) -> bool {
-        if (load_address) {
-          // We print an empty line after a sequence of errors to show more
-          // clearly that there's a gap in the trace
+          prev_insn = None;
+          was_prev_instruction_an_error = true;
+        } else {
           if (was_prev_instruction_an_error)
             s.Printf("    ...missing instructions\n");
 
-          Address address;
-          if (!raw) {
-            target.GetSectionLoadList().ResolveLoadAddress(*load_address,
-                                                           address);
-
-            sc = DumpSymbolContext(s, sc, target, address);
-          }
+          if (!raw)
+            DumpInstructionSymbolContext(s, prev_insn, *insn);
 
           printInstructionIndex(index);
-          s.Printf("0x%016" PRIx64 "    ", *load_address);
+          s.Printf("0x%016" PRIx64 "    ", insn->load_address);
 
-          if (!raw) {
-            disassembler =
-                DumpInstructionInfo(s, sc, disassembler, exe_ctx, address);
-          }
+          if (!raw)
+            DumpInstructionDisassembly(s, *insn);
 
+          prev_insn = *insn;
           was_prev_instruction_an_error = false;
-        } else {
-          printInstructionIndex(index);
-          s << toString(load_address.takeError());
-          was_prev_instruction_an_error = true;
-          if (!raw)
-            sc = SymbolContext();
         }
 
         s.Printf("\n");
-
         return index < end_position;
       });
 }

diff  --git a/lldb/test/API/commands/trace/TestTraceStartStop.py b/lldb/test/API/commands/trace/TestTraceStartStop.py
index ad301a575b8f..7b40c67729f8 100644
--- a/lldb/test/API/commands/trace/TestTraceStartStop.py
+++ b/lldb/test/API/commands/trace/TestTraceStartStop.py
@@ -100,7 +100,6 @@ def testStartStopLiveThreads(self):
   a.out`main \+ 11 at main.cpp:4
     \[1\] {ADDRESS_REGEX}    movl .*
     \[2\] {ADDRESS_REGEX}    jmp  .* ; <\+28> at main.cpp:4
-  a.out`main \+ 28 at main.cpp:4
     \[3\] {ADDRESS_REGEX}    cmpl .*
     \[4\] {ADDRESS_REGEX}    jle  .* ; <\+20> at main.cpp:5'''])
 


        


More information about the lldb-commits mailing list