[Lldb-commits] [lldb] 1e5083a - [trace][intel pt] Handle better tsc in the decoder

Sat Apr 2 11:07:16 PDT 2022

Author: Walter Erquinigo
Date: 2022-04-02T11:06:26-07:00
New Revision: 1e5083a563f8aca294feda60dcb4a814623ba321

URL: https://github.com/llvm/llvm-project/commit/1e5083a563f8aca294feda60dcb4a814623ba321
DIFF: https://github.com/llvm/llvm-project/commit/1e5083a563f8aca294feda60dcb4a814623ba321.diff

LOG: [trace][intel pt] Handle better tsc in the decoder

A problem that I introduced in the decoder is that I was considering TSC decoding
errors as actual instruction errors, which mean that the trace has a gap. This is
wrong because a TSC decoding error doesn't mean that there's a gap in the trace.
Instead, now I'm just counting how many of these errors happened and I'm using
the `dump info` command to check for this number.

Besides that, I refactored the decoder a little bit to make it simpler, more
readable, and to handle TSCs in a cleaner way.

Differential Revision: https://reviews.llvm.org/D122867

Added: 
    

Modified: 
    lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
    lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
    lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
    lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
    lldb/test/API/commands/trace/TestTraceDumpInfo.py

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
index 4ef689499188f..994d068810f1e 100644

--- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
@@ -86,12 +86,7 @@ IntelPTInstruction::GetControlFlowType(lldb::addr_t next_load_address) const {
 
 ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
 
-void DecodedThread::AppendInstruction(const pt_insn &insn) {
-  m_instructions.emplace_back(insn);
-}
-
-void DecodedThread::AppendInstruction(const pt_insn &insn, uint64_t tsc) {
-  m_instructions.emplace_back(insn);
+void DecodedThread::RecordTscForLastInstruction(uint64_t tsc) {
   if (!m_last_tsc || *m_last_tsc != tsc) {
     // In case the first instructions are errors or did not have a TSC, we'll
     // get a first valid TSC not in position 0. We can safely force these error
@@ -103,11 +98,38 @@ void DecodedThread::AppendInstruction(const pt_insn &insn, uint64_t tsc) {
   }
 }
 
+void DecodedThread::AppendInstruction(const pt_insn &insn) {
+  m_instructions.emplace_back(insn);
+}
+
+void DecodedThread::AppendInstruction(const pt_insn &insn, uint64_t tsc) {
+  AppendInstruction(insn);
+  RecordTscForLastInstruction(tsc);
+}
+
 void DecodedThread::AppendError(llvm::Error &&error) {
   m_errors.try_emplace(m_instructions.size(), toString(std::move(error)));
   m_instructions.emplace_back();
 }
 
+void DecodedThread::AppendError(llvm::Error &&error, uint64_t tsc) {
+  AppendError(std::move(error));
+  RecordTscForLastInstruction(tsc);
+}
+
+void DecodedThread::LibiptErrors::RecordError(int libipt_error_code) {
+  libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++;
+  total_count++;
+}
+
+void DecodedThread::RecordTscError(int libipt_error_code) {
+  m_tsc_errors.RecordError(libipt_error_code);
+}
+
+const DecodedThread::LibiptErrors &DecodedThread::GetTscErrors() const {
+  return m_tsc_errors;
+}
+
 ArrayRef<IntelPTInstruction> DecodedThread::GetInstructions() const {
   return makeArrayRef(m_instructions);
 }
@@ -194,4 +216,4 @@ Optional<DecodedThread::TscRange> DecodedThread::TscRange::Prev() {
   auto prev_it = m_it;
   --prev_it;
   return TscRange(prev_it, *m_decoded_thread);
-}
\ No newline at end of file
+}

diff  --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
index acb67738f3a7e..030fea0b2fca9 100644
--- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
+++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
@@ -155,6 +155,15 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
     const DecodedThread *m_decoded_thread;
   };
 
+  // Struct holding counts for libipts errors;
+  struct LibiptErrors {
+    // libipt error -> count
+    llvm::DenseMap<const char *, int> libipt_errors;
+    int total_count = 0;
+
+    void RecordError(int libipt_error_code);
+  };
+
   DecodedThread(lldb::ThreadSP thread_sp);
 
   /// Utility constructor that initializes the trace with a provided error.
@@ -195,6 +204,17 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
   ///   points to a valid instruction.
   const char *GetErrorByInstructionIndex(size_t ins_idx);
 
+  /// Append a decoding error with a corresponding TSC.
+  void AppendError(llvm::Error &&error, uint64_t TSC);
+
+  /// Record an error decoding a TSC timestamp.
+  ///
+  /// See \a GetTscErrors() for more documentation.
+  ///
+  /// \param[in] libipt_error_code
+  ///   An error returned by the libipt library.
+  void RecordTscError(int libipt_error_code);
+
   /// Get a new cursor for the decoded thread.
   lldb::TraceCursorUP GetCursor();
 
@@ -207,6 +227,14 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
   ///   The size of the trace, or \b llvm::None if not available.
   llvm::Optional<size_t> GetRawTraceSize() const;
 
+  /// Return the number of TSC decoding errors that happened. A TSC error
+  /// is not a fatal error and doesn't create gaps in the trace. Instead
+  /// we only keep track of them as a statistic.
+  ///
+  /// \return
+  ///   The number of TSC decoding errors.
+  const LibiptErrors &GetTscErrors() const;
+
   /// The approximate size in bytes used by this instance,
   /// including all the already decoded instructions.
   size_t CalculateApproximateMemoryUsage() const;
@@ -214,6 +242,10 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
   lldb::ThreadSP GetThread();
 
 private:
+  /// Notify this class that the last added instruction or error has
+  /// an associated TSC.
+  void RecordTscForLastInstruction(uint64_t tsc);
+
   /// When adding new members to this class, make sure
   /// to update \a CalculateApproximateMemoryUsage() accordingly.
   lldb::ThreadSP m_thread_sp;
@@ -235,6 +267,8 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
   /// The size in bytes of the raw buffer before decoding. It might be None if
   /// the decoding failed.
   llvm::Optional<size_t> m_raw_trace_size;
+  /// All occurrences of libipt errors when decoding TSCs.
+  LibiptErrors m_tsc_errors;
 };
 
 using DecodedThreadSP = std::shared_ptr<DecodedThread>;

diff  --git a/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
index 9ca366bb57746..7af56f13cb250 100644
--- a/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
@@ -90,6 +90,59 @@ static int ProcessPTEvents(pt_insn_decoder &decoder, int errcode) {
   return 0;
 }
 
+// Simple struct used by the decoder to keep the state of the most
+// recent TSC and a flag indicating whether TSCs are enabled, not enabled
+// or we just don't yet.
+struct TscInfo {
+  uint64_t tsc = 0;
+  LazyBool has_tsc = eLazyBoolCalculate;
+
+  explicit operator bool() const { return has_tsc == eLazyBoolYes; }
+};
+
+/// Query the decoder for the most recent TSC timestamp and update
+/// tsc_info accordingly.
+void RefreshTscInfo(TscInfo &tsc_info, pt_insn_decoder &decoder,
+                    DecodedThread &decoded_thread) {
+  if (tsc_info.has_tsc == eLazyBoolNo)
+    return;
+
+  uint64_t new_tsc;
+  if (int tsc_error = pt_insn_time(&decoder, &new_tsc, nullptr, nullptr)) {
+    if (tsc_error == -pte_no_time) {
+      // We now know that the trace doesn't support TSC, so we won't try again.
+      // See
+      // https://github.com/intel/libipt/blob/master/doc/man/pt_qry_time.3.md
+      tsc_info.has_tsc = eLazyBoolNo;
+    } else {
+      // We don't add TSC decoding errors in the decoded trace itself to prevent
+      // creating unnecessary gaps, but we can count how many of these errors
+      // happened. In this case we reuse the previous correct TSC we saw, as
+      // it's better than no TSC at all.
+      decoded_thread.RecordTscError(tsc_error);
+    }
+  } else {
+    tsc_info.tsc = new_tsc;
+    tsc_info.has_tsc = eLazyBoolYes;
+  }
+}
+
+static void AppendError(DecodedThread &decoded_thread, Error &&error,
+                        TscInfo &tsc_info) {
+  if (tsc_info)
+    decoded_thread.AppendError(std::move(error), tsc_info.tsc);
+  else
+    decoded_thread.AppendError(std::move(error));
+}
+
+static void AppendInstruction(DecodedThread &decoded_thread,
+                              const pt_insn &insn, TscInfo &tsc_info) {
+  if (tsc_info)
+    decoded_thread.AppendInstruction(insn, tsc_info.tsc);
+  else
+    decoded_thread.AppendInstruction(insn);
+}
+
 /// Decode all the instructions from a configured decoder.
 /// The decoding flow is based on
 /// https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#the-instruction-flow-decode-loop
@@ -98,60 +151,50 @@ static int ProcessPTEvents(pt_insn_decoder &decoder, int errcode) {
 /// Error codes returned by libipt while decoding are:
 /// - negative: actual errors
 /// - positive or zero: not an error, but a list of bits signaling the status of
-/// the decoder
+/// the decoder, e.g. whether there are events that need to be decoded or not
 ///
 /// \param[in] decoder
 ///   A configured libipt \a pt_insn_decoder.
 static void DecodeInstructions(pt_insn_decoder &decoder,
-                               DecodedThreadSP &decoded_thread_sp) {
-  while (true) {
-    int errcode = FindNextSynchronizationPoint(decoder);
-    if (errcode == -pte_eos)
-      break;
+                               DecodedThread &decoded_thread) {
 
-    if (errcode < 0) {
-      decoded_thread_sp->AppendError(make_error<IntelPTError>(errcode));
+  TscInfo tsc_info;
+  // We have this "global" errcode because if it's positive, we'll need
+  // its bits later to process events.
+  int errcode;
+
+  while (true) {
+    if ((errcode = FindNextSynchronizationPoint(decoder)) < 0) {
+      // We signal a gap only if it's not "end of stream"
+      if (errcode != -pte_eos)
+        AppendError(decoded_thread, make_error<IntelPTError>(errcode),
+                    tsc_info);
       break;
     }
 
     // We have synchronized, so we can start decoding
     // instructions and events.
     while (true) {
-      errcode = ProcessPTEvents(decoder, errcode);
-      if (errcode < 0) {
-        decoded_thread_sp->AppendError(make_error<IntelPTError>(errcode));
+      if ((errcode = ProcessPTEvents(decoder, errcode)) < 0) {
+        AppendError(decoded_thread, make_error<IntelPTError>(errcode),
+                    tsc_info);
         break;
       }
 
-      pt_insn insn;
-      errcode = pt_insn_next(&decoder, &insn, sizeof(insn));
-      if (errcode == -pte_eos)
-        break;
-
-      if (errcode < 0) {
-        decoded_thread_sp->AppendError(
-            make_error<IntelPTError>(errcode, insn.ip));
-        break;
-      }
+      // We refresh the TSC that might have changed after processing the events.
+      // See
+      // https://github.com/intel/libipt/blob/master/doc/man/pt_evt_next.3.md
+      RefreshTscInfo(tsc_info, decoder, decoded_thread);
 
-      uint64_t time;
-      int time_error = pt_insn_time(&decoder, &time, nullptr, nullptr);
-      if (time_error == -pte_invalid) {
-        // This happens if we invoke the pt_insn_time method incorrectly,
-        // but the instruction is good though.
-        decoded_thread_sp->AppendError(
-            make_error<IntelPTError>(time_error, insn.ip));
-        decoded_thread_sp->AppendInstruction(insn);
+      pt_insn insn;
+      if ((errcode = pt_insn_next(&decoder, &insn, sizeof(insn))) < 0) {
+        // We signal a gap only if it's not "end of stream"
+        if (errcode != -pte_eos)
+          AppendError(decoded_thread,
+                      make_error<IntelPTError>(errcode, insn.ip), tsc_info);
         break;
       }
-
-      if (time_error == -pte_no_time) {
-        // We simply don't have time information, i.e. None of TSC, MTC or CYC
-        // was enabled.
-        decoded_thread_sp->AppendInstruction(insn);
-      } else {
-        decoded_thread_sp->AppendInstruction(insn, time);
-      }
+      AppendInstruction(decoded_thread, insn, tsc_info);
     }
   }
 }
@@ -202,7 +245,7 @@ static void DecodeInMemoryTrace(DecodedThreadSP &decoded_thread_sp,
   assert(errcode == 0);
   (void)errcode;
 
-  DecodeInstructions(*decoder, decoded_thread_sp);
+  DecodeInstructions(*decoder, *decoded_thread_sp);
   pt_insn_free_decoder(decoder);
 }
 // ---------------------------

diff  --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
index d6e05ce42907e..c40b60435ff3d 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -105,24 +105,31 @@ lldb::TraceCursorUP TraceIntelPT::GetCursor(Thread &thread) {
 
 void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) {
   Optional<size_t> raw_size = GetRawTraceSize(thread);
-  s.Printf("\nthread #%u: tid = %" PRIu64, thread.GetIndexID(), thread.GetID());
+  s.Format("\nthread #{0}: tid = {1}", thread.GetIndexID(), thread.GetID());
   if (!raw_size) {
-    s.Printf(", not traced\n");
+    s << ", not traced\n";
     return;
   }
-  s.Printf("\n");
-
-  size_t insn_len = Decode(thread)->GetInstructions().size();
-  size_t mem_used = Decode(thread)->CalculateApproximateMemoryUsage();
-
-  s.Printf("  Raw trace size: %zu KiB\n", *raw_size / 1024);
-  s.Printf("  Total number of instructions: %zu\n", insn_len);
-  s.Printf("  Total approximate memory usage: %0.2lf KiB\n",
+  s << "\n";
+  DecodedThreadSP decoded_trace_sp = Decode(thread);
+  size_t insn_len = decoded_trace_sp->GetInstructions().size();
+  size_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage();
+
+  s.Format("  Raw trace size: {0} KiB\n", *raw_size / 1024);
+  s.Format("  Total number of instructions: {0}\n", insn_len);
+  s.Format("  Total approximate memory usage: {0:2} KiB\n",
            (double)mem_used / 1024);
   if (insn_len != 0)
-    s.Printf("  Average memory usage per instruction: %0.2lf bytes\n",
+    s.Format("  Average memory usage per instruction: {0:2} bytes\n",
              (double)mem_used / insn_len);
-  return;
+
+  const DecodedThread::LibiptErrors &tsc_errors =
+      decoded_trace_sp->GetTscErrors();
+  s.Format("\n  Number of TSC decoding errors: {0}\n", tsc_errors.total_count);
+  for (const auto &error_message_to_count : tsc_errors.libipt_errors) {
+    s.Format("    {0}: {1}\n", error_message_to_count.first,
+             error_message_to_count.second);
+  }
 }
 
 Optional<size_t> TraceIntelPT::GetRawTraceSize(Thread &thread) {

diff  --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py
index 89c1fad7fef8d..74b8eb38f023a 100644
--- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py
+++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py
@@ -41,4 +41,6 @@ def testDumpRawTraceSize(self):
   Raw trace size: 4 KiB
   Total number of instructions: 21
   Total approximate memory usage: 0.98 KiB
-  Average memory usage per instruction: 48.00 bytes'''])
+  Average memory usage per instruction: 48.00 bytes
+
+  Number of TSC decoding errors: 0'''])