[Lldb-commits] [lldb] ca922a3 - [intelpt] Refactor timestamps out of `IntelPTInstruction`

Alisamar Husain via lldb-commits lldb-commits at lists.llvm.org
Fri Apr 1 09:22:09 PDT 2022


Author: Alisamar Husain
Date: 2022-04-01T21:51:42+05:30
New Revision: ca922a3559d7c537fefbf023b6c9fd0f8ff0ed9d

URL: https://github.com/llvm/llvm-project/commit/ca922a3559d7c537fefbf023b6c9fd0f8ff0ed9d
DIFF: https://github.com/llvm/llvm-project/commit/ca922a3559d7c537fefbf023b6c9fd0f8ff0ed9d.diff

LOG: [intelpt] Refactor timestamps out of `IntelPTInstruction`
Storing timestamps (TSCs) in a more efficient map at the decoded thread level to speed up TSC lookup, as well as reduce the amount of memory used by each decoded instruction. Also introduced TSC range which keeps the current timestamp valid for all subsequent instructions until the next timestamp is emitted.

Differential Revision: https://reviews.llvm.org/D122603

Added: 
    

Modified: 
    lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
    lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
    lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp
    lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h
    lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
    lldb/test/API/commands/trace/TestTraceDumpInfo.py
    lldb/test/API/commands/trace/TestTraceLoad.py

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
index 3c39c4d9a96d3..4ef689499188f 100644
--- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
@@ -49,10 +49,6 @@ size_t IntelPTInstruction::GetMemoryUsage() {
   return sizeof(IntelPTInstruction);
 }
 
-Optional<uint64_t> IntelPTInstruction::GetTimestampCounter() const {
-  return m_timestamp;
-}
-
 Optional<size_t> DecodedThread::GetRawTraceSize() const {
   return m_raw_trace_size;
 }
@@ -90,6 +86,23 @@ IntelPTInstruction::GetControlFlowType(lldb::addr_t next_load_address) const {
 
 ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
 
+void DecodedThread::AppendInstruction(const pt_insn &insn) {
+  m_instructions.emplace_back(insn);
+}
+
+void DecodedThread::AppendInstruction(const pt_insn &insn, uint64_t tsc) {
+  m_instructions.emplace_back(insn);
+  if (!m_last_tsc || *m_last_tsc != tsc) {
+    // In case the first instructions are errors or did not have a TSC, we'll
+    // get a first valid TSC not in position 0. We can safely force these error
+    // instructions to use the first valid TSC, so that all the trace has TSCs.
+    size_t start_index =
+        m_instruction_timestamps.empty() ? 0 : m_instructions.size() - 1;
+    m_instruction_timestamps.emplace(start_index, tsc);
+    m_last_tsc = tsc;
+  }
+}
+
 void DecodedThread::AppendError(llvm::Error &&error) {
   m_errors.try_emplace(m_instructions.size(), toString(std::move(error)));
   m_instructions.emplace_back();
@@ -99,8 +112,21 @@ ArrayRef<IntelPTInstruction> DecodedThread::GetInstructions() const {
   return makeArrayRef(m_instructions);
 }
 
-const char *DecodedThread::GetErrorByInstructionIndex(uint64_t idx) {
-  auto it = m_errors.find(idx);
+Optional<DecodedThread::TscRange>
+DecodedThread::CalculateTscRange(size_t insn_index) const {
+  auto it = m_instruction_timestamps.upper_bound(insn_index);
+  if (it == m_instruction_timestamps.begin())
+    return None;
+
+  return TscRange(--it, *this);
+}
+
+bool DecodedThread::IsInstructionAnError(size_t insn_idx) const {
+  return m_instructions[insn_idx].IsError();
+}
+
+const char *DecodedThread::GetErrorByInstructionIndex(size_t insn_idx) {
+  auto it = m_errors.find(insn_idx);
   if (it == m_errors.end())
     return nullptr;
 
@@ -125,7 +151,47 @@ lldb::TraceCursorUP DecodedThread::GetCursor() {
 }
 
 size_t DecodedThread::CalculateApproximateMemoryUsage() const {
-  return m_raw_trace_size.getValueOr(0) +
-         IntelPTInstruction::GetMemoryUsage() * m_instructions.size() +
+  return IntelPTInstruction::GetMemoryUsage() * m_instructions.size() +
          m_errors.getMemorySize();
 }
+
+DecodedThread::TscRange::TscRange(std::map<size_t, uint64_t>::const_iterator it,
+                                  const DecodedThread &decoded_thread)
+    : m_it(it), m_decoded_thread(&decoded_thread) {
+  auto next_it = m_it;
+  ++next_it;
+  m_end_index = (next_it == m_decoded_thread->m_instruction_timestamps.end())
+                    ? m_decoded_thread->GetInstructions().size() - 1
+                    : next_it->first - 1;
+}
+
+size_t DecodedThread::TscRange::GetTsc() const { return m_it->second; }
+
+size_t DecodedThread::TscRange::GetStartInstructionIndex() const {
+  return m_it->first;
+}
+
+size_t DecodedThread::TscRange::GetEndInstructionIndex() const {
+  return m_end_index;
+}
+
+bool DecodedThread::TscRange::InRange(size_t insn_index) {
+  return GetStartInstructionIndex() <= insn_index &&
+         insn_index <= GetEndInstructionIndex();
+}
+
+Optional<DecodedThread::TscRange> DecodedThread::TscRange::Next() {
+  auto next_it = m_it;
+  ++next_it;
+  if (next_it == m_decoded_thread->m_instruction_timestamps.end())
+    return None;
+  return TscRange(next_it, *m_decoded_thread);
+}
+
+Optional<DecodedThread::TscRange> DecodedThread::TscRange::Prev() {
+  if (m_it == m_decoded_thread->m_instruction_timestamps.begin())
+    return None;
+  auto prev_it = m_it;
+  --prev_it;
+  return TscRange(prev_it, *m_decoded_thread);
+}
\ No newline at end of file

diff  --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
index bda5d046c3a0d..acb67738f3a7e 100644
--- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
+++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
@@ -62,9 +62,6 @@ class IntelPTError : public llvm::ErrorInfo<IntelPTError> {
 /// As mentioned, any gap is represented as an error in this class.
 class IntelPTInstruction {
 public:
-  IntelPTInstruction(const pt_insn &pt_insn, uint64_t timestamp)
-      : m_pt_insn(pt_insn), m_timestamp(timestamp), m_is_error(false) {}
-
   IntelPTInstruction(const pt_insn &pt_insn)
       : m_pt_insn(pt_insn), m_is_error(false) {}
 
@@ -85,13 +82,6 @@ class IntelPTInstruction {
   /// Get the size in bytes of an instance of this class
   static size_t GetMemoryUsage();
 
-  /// Get the timestamp associated with the current instruction. The timestamp
-  /// is similar to what a rdtsc instruction would return.
-  ///
-  /// \return
-  ///     The timestamp or \b llvm::None if not available.
-  llvm::Optional<uint64_t> GetTimestampCounter() const;
-
   /// Get the \a lldb::TraceInstructionControlFlowType categories of the
   /// instruction.
   ///
@@ -111,9 +101,8 @@ class IntelPTInstruction {
   const IntelPTInstruction &operator=(const IntelPTInstruction &other) = delete;
 
   // When adding new members to this class, make sure to update
-  // IntelPTInstruction::GetNonErrorMemoryUsage() if needed.
+  // IntelPTInstruction::GetMemoryUsage() if needed.
   pt_insn m_pt_insn;
-  llvm::Optional<uint64_t> m_timestamp;
   bool m_is_error;
 };
 
@@ -126,11 +115,60 @@ class IntelPTInstruction {
 /// stopped at. See \a Trace::GetCursorPosition for more information.
 class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
 public:
+  /// \class TscRange
+  /// Class that represents the trace range associated with a given TSC.
+  /// It provides efficient iteration to the previous or next TSC range in the
+  /// decoded trace.
+  ///
+  /// TSC timestamps are emitted by the decoder infrequently, which means
+  /// that each TSC covers a range of instruction indices, which can be used to
+  /// speed up TSC lookups.
+  class TscRange {
+  public:
+    /// Check if this TSC range includes the given instruction index.
+    bool InRange(size_t insn_index);
+
+    /// Get the next range chronologically.
+    llvm::Optional<TscRange> Next();
+
+    /// Get the previous range chronologically.
+    llvm::Optional<TscRange> Prev();
+
+    /// Get the TSC value.
+    size_t GetTsc() const;
+    /// Get the smallest instruction index that has this TSC.
+    size_t GetStartInstructionIndex() const;
+    /// Get the largest instruction index that has this TSC.
+    size_t GetEndInstructionIndex() const;
+
+  private:
+    friend class DecodedThread;
+
+    TscRange(std::map<size_t, uint64_t>::const_iterator it,
+             const DecodedThread &decoded_thread);
+
+    /// The iterator pointing to the beginning of the range.
+    std::map<size_t, uint64_t>::const_iterator m_it;
+    /// The largest instruction index that has this TSC.
+    size_t m_end_index;
+
+    const DecodedThread *m_decoded_thread;
+  };
+
   DecodedThread(lldb::ThreadSP thread_sp);
 
   /// Utility constructor that initializes the trace with a provided error.
   DecodedThread(lldb::ThreadSP thread_sp, llvm::Error &&err);
 
+  /// Append a successfully decoded instruction.
+  void AppendInstruction(const pt_insn &instruction);
+
+  /// Append a sucessfully decoded instruction with an associated TSC timestamp.
+  void AppendInstruction(const pt_insn &instruction, uint64_t tsc);
+
+  /// Append a decoding error (i.e. an instruction that failed to be decoded).
+  void AppendError(llvm::Error &&error);
+
   /// Get the instructions from the decoded trace. Some of them might indicate
   /// errors (i.e. gaps) in the trace. For an instruction error, you can access
   /// its underlying error message with the \a GetErrorByInstructionIndex()
@@ -140,20 +178,22 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
   ///   The instructions of the trace.
   llvm::ArrayRef<IntelPTInstruction> GetInstructions() const;
 
+  /// Construct the TSC range that covers the given instruction index.
+  /// This operation is O(logn) and should be used sparingly.
+  /// If the trace was collected with TSC support, all the instructions of
+  /// the trace will have associated TSCs. This means that this method will
+  /// only return \b llvm::None if there are no TSCs whatsoever in the trace.
+  llvm::Optional<TscRange> CalculateTscRange(size_t insn_index) const;
+
+  /// Check if an instruction given by its index is an error.
+  bool IsInstructionAnError(size_t insn_idx) const;
+
   /// Get the error associated with a given instruction index.
   ///
   /// \return
   ///   The error message of \b nullptr if the given index
   ///   points to a valid instruction.
-  const char *GetErrorByInstructionIndex(uint64_t ins_idx);
-
-  /// Append a successfully decoded instruction.
-  template <typename... Ts> void AppendInstruction(Ts... instruction_args) {
-    m_instructions.emplace_back(instruction_args...);
-  }
-
-  /// Append a decoding error (i.e. an instruction that failed to be decoded).
-  void AppendError(llvm::Error &&error);
+  const char *GetErrorByInstructionIndex(size_t ins_idx);
 
   /// Get a new cursor for the decoded thread.
   lldb::TraceCursorUP GetCursor();
@@ -177,8 +217,23 @@ class DecodedThread : public std::enable_shared_from_this<DecodedThread> {
   /// When adding new members to this class, make sure
   /// to update \a CalculateApproximateMemoryUsage() accordingly.
   lldb::ThreadSP m_thread_sp;
+  /// The low level storage of all instruction addresses. Each instruction has
+  /// an index in this vector and it will be used in other parts of the code.
   std::vector<IntelPTInstruction> m_instructions;
+  /// This map contains the TSCs of the decoded instructions. It maps
+  /// `instruction index -> TSC`, where `instruction index` is the first index
+  /// at which the mapped TSC appears. We use this representation because TSCs
+  /// are sporadic and we can think of them as ranges. If TSCs are present in
+  /// the trace, all instructions will have an associated TSC, including the
+  /// first one. Otherwise, this map will be empty.
+  std::map<size_t, uint64_t> m_instruction_timestamps;
+  /// This is the chronologically last TSC that has been added.
+  llvm::Optional<uint64_t> m_last_tsc = llvm::None;
+  // This variables stores the messages of all the error instructions in the
+  // trace. It maps `instruction index -> error message`.
   llvm::DenseMap<uint64_t, std::string> m_errors;
+  /// The size in bytes of the raw buffer before decoding. It might be None if
+  /// the decoding failed.
   llvm::Optional<size_t> m_raw_trace_size;
 };
 

diff  --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp
index b335e5e4774f7..59b0cc1865d1b 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp
@@ -23,6 +23,7 @@ TraceCursorIntelPT::TraceCursorIntelPT(ThreadSP thread_sp,
   assert(!m_decoded_thread_sp->GetInstructions().empty() &&
          "a trace should have at least one instruction or error");
   m_pos = m_decoded_thread_sp->GetInstructions().size() - 1;
+  m_tsc_range = m_decoded_thread_sp->CalculateTscRange(m_pos);
 }
 
 size_t TraceCursorIntelPT::GetInternalInstructionSize() {
@@ -40,6 +41,10 @@ bool TraceCursorIntelPT::Next() {
 
   while (canMoveOne()) {
     m_pos += IsForwards() ? 1 : -1;
+
+    if (m_tsc_range && !m_tsc_range->InRange(m_pos))
+      m_tsc_range = IsForwards() ? m_tsc_range->Next() : m_tsc_range->Prev();
+
     if (!m_ignore_errors && IsError())
       return true;
     if (GetInstructionControlFlowType() & m_granularity)
@@ -58,23 +63,29 @@ size_t TraceCursorIntelPT::Seek(int64_t offset, SeekType origin) {
     return std::min(std::max((int64_t)0, raw_pos), last_index);
   };
 
-  switch (origin) {
-  case TraceCursor::SeekType::Set:
-    m_pos = fitPosToBounds(offset);
-    return m_pos;
-  case TraceCursor::SeekType::End:
-    m_pos = fitPosToBounds(offset + last_index);
-    return last_index - m_pos;
-  case TraceCursor::SeekType::Current:
-    int64_t new_pos = fitPosToBounds(offset + m_pos);
-    int64_t dist = m_pos - new_pos;
-    m_pos = new_pos;
-    return std::abs(dist);
-  }
+  auto FindDistanceAndSetPos = [&]() -> int64_t {
+    switch (origin) {
+    case TraceCursor::SeekType::Set:
+      m_pos = fitPosToBounds(offset);
+      return m_pos;
+    case TraceCursor::SeekType::End:
+      m_pos = fitPosToBounds(offset + last_index);
+      return last_index - m_pos;
+    case TraceCursor::SeekType::Current:
+      int64_t new_pos = fitPosToBounds(offset + m_pos);
+      int64_t dist = m_pos - new_pos;
+      m_pos = new_pos;
+      return std::abs(dist);
+    }
+  };
+  
+	int64_t dist = FindDistanceAndSetPos();
+  m_tsc_range = m_decoded_thread_sp->CalculateTscRange(m_pos);
+  return dist;
 }
 
 bool TraceCursorIntelPT::IsError() {
-  return m_decoded_thread_sp->GetInstructions()[m_pos].IsError();
+  return m_decoded_thread_sp->IsInstructionAnError(m_pos);
 }
 
 const char *TraceCursorIntelPT::GetError() {
@@ -85,10 +96,14 @@ lldb::addr_t TraceCursorIntelPT::GetLoadAddress() {
   return m_decoded_thread_sp->GetInstructions()[m_pos].GetLoadAddress();
 }
 
-Optional<uint64_t> TraceCursorIntelPT::GetCounter(lldb::TraceCounter counter_type) {
+Optional<uint64_t>
+TraceCursorIntelPT::GetCounter(lldb::TraceCounter counter_type) {
   switch (counter_type) {
-    case lldb::eTraceCounterTSC:
-      return m_decoded_thread_sp->GetInstructions()[m_pos].GetTimestampCounter();
+  case lldb::eTraceCounterTSC:
+    if (m_tsc_range)
+      return m_tsc_range->GetTsc();
+    else
+      return llvm::None;
   }
 }
 

diff  --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h
index cda524b98c69f..ef40089f172fa 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h
@@ -42,6 +42,8 @@ class TraceCursorIntelPT : public TraceCursor {
   DecodedThreadSP m_decoded_thread_sp;
   /// Internal instruction index currently pointing at.
   size_t m_pos;
+  /// Tsc range covering the current instruction.
+  llvm::Optional<DecodedThread::TscRange> m_tsc_range;
 };
 
 } // namespace trace_intel_pt

diff  --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
index 8ba6c1c51db72..d6e05ce42907e 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -119,8 +119,9 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) {
   s.Printf("  Total number of instructions: %zu\n", insn_len);
   s.Printf("  Total approximate memory usage: %0.2lf KiB\n",
            (double)mem_used / 1024);
-  s.Printf("  Average memory usage per instruction: %zu bytes\n",
-           mem_used / insn_len);
+  if (insn_len != 0)
+    s.Printf("  Average memory usage per instruction: %0.2lf bytes\n",
+             (double)mem_used / insn_len);
   return;
 }
 

diff  --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py
index a90a28cf7fea1..89c1fad7fef8d 100644
--- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py
+++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py
@@ -40,5 +40,5 @@ def testDumpRawTraceSize(self):
 thread #1: tid = 3842849
   Raw trace size: 4 KiB
   Total number of instructions: 21
-  Total approximate memory usage: 5.31 KiB
-  Average memory usage per instruction: 259 bytes'''])
+  Total approximate memory usage: 0.98 KiB
+  Average memory usage per instruction: 48.00 bytes'''])

diff  --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py
index a68aa71bcedfe..95f57b4018276 100644
--- a/lldb/test/API/commands/trace/TestTraceLoad.py
+++ b/lldb/test/API/commands/trace/TestTraceLoad.py
@@ -38,8 +38,8 @@ def testLoadTrace(self):
 thread #1: tid = 3842849
   Raw trace size: 4 KiB
   Total number of instructions: 21
-  Total approximate memory usage: 5.31 KiB
-  Average memory usage per instruction: 259 bytes'''])
+  Total approximate memory usage: 0.98 KiB
+  Average memory usage per instruction: 48.00 bytes'''])
 
     def testLoadInvalidTraces(self):
         src_dir = self.getSourceDir()


        


More information about the lldb-commits mailing list