[Lldb-commits] [lldb] ad884d9 - A few updates around "transcript" (#92843)

Fri May 31 14:42:04 PDT 2024

Author: royitaqi
Date: 2024-05-31T14:42:00-07:00
New Revision: ad884d97288c752ba9088d01cf7ab80b20e4d2a6

URL: https://github.com/llvm/llvm-project/commit/ad884d97288c752ba9088d01cf7ab80b20e4d2a6
DIFF: https://github.com/llvm/llvm-project/commit/ad884d97288c752ba9088d01cf7ab80b20e4d2a6.diff

LOG: A few updates around "transcript" (#92843)

# Changes

1. Changes to the structured transcript.
1. Add fields `commandName` and `commandArguments`. They will hold the
name and the arguments string of the expanded/executed command (e.g.
`breakpoint set` and `-f main.cpp -l 4`). This is not to be confused
with the `command` field, which holds the user input (e.g. `br s -f
main.cpp -l 4`).
2. Add field `timestampInEpochSeconds`. It will hold the timestamp when
the command is executed.
3. Rename field `seconds` to `durationInSeconds`, to improve
readability, especially since `timestampInEpochSeconds` is added.
2. When transcript is available and the newly added option
`--transcript` is present, add the transcript to the output of
`statistics dump`, as a JSON array under a new field `transcript`.
3. A few test name and comment changes.

Added: 
    

Modified: 
    lldb/include/lldb/API/SBCommandInterpreter.h
    lldb/include/lldb/Interpreter/CommandInterpreter.h
    lldb/include/lldb/Target/Statistics.h
    lldb/source/Commands/CommandObjectStats.cpp
    lldb/source/Commands/Options.td
    lldb/source/Interpreter/CommandInterpreter.cpp
    lldb/source/Target/Statistics.cpp
    lldb/test/API/commands/statistics/basic/TestStats.py
    lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/API/SBCommandInterpreter.h b/lldb/include/lldb/API/SBCommandInterpreter.h
index 8ac36344b3a79..639309aa32bfc 100644

--- a/lldb/include/lldb/API/SBCommandInterpreter.h
+++ b/lldb/include/lldb/API/SBCommandInterpreter.h
@@ -320,10 +320,17 @@ class SBCommandInterpreter {
 
   /// Returns a list of handled commands, output and error. Each element in
   /// the list is a dictionary with the following keys/values:
-  /// - "command" (string): The command that was executed.
+  /// - "command" (string): The command that was given by the user.
+  /// - "commandName" (string): The name of the executed command.
+  /// - "commandArguments" (string): The arguments of the executed command.
   /// - "output" (string): The output of the command. Empty ("") if no output.
   /// - "error" (string): The error of the command. Empty ("") if no error.
-  /// - "seconds" (float): The time it took to execute the command.
+  /// - "durationInSeconds" (float): The time it took to execute the command.
+  /// - "timestampInEpochSeconds" (int): The timestamp when the command is
+  ///   executed.
+  ///
+  /// Turn on settings `interpreter.save-transcript` for LLDB to populate
+  /// this list. Otherwise this list is empty.
   SBStructuredData GetTranscript();
 
 protected:

diff  --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index ccc30cf4f1a82..8863523b2e31f 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -776,10 +776,14 @@ class CommandInterpreter : public Broadcaster,
 
   /// Contains a list of handled commands and their details. Each element in
   /// the list is a dictionary with the following keys/values:
-  /// - "command" (string): The command that was executed.
+  /// - "command" (string): The command that was given by the user.
+  /// - "commandName" (string): The name of the executed command.
+  /// - "commandArguments" (string): The arguments of the executed command.
   /// - "output" (string): The output of the command. Empty ("") if no output.
   /// - "error" (string): The error of the command. Empty ("") if no error.
-  /// - "seconds" (float): The time it took to execute the command.
+  /// - "durationInSeconds" (float): The time it took to execute the command.
+  /// - "timestampInEpochSeconds" (int): The timestamp when the command is
+  ///   executed.
   ///
   /// Turn on settings `interpreter.save-transcript` for LLDB to populate
   /// this list. Otherwise this list is empty.

diff  --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h
index c4f17b503a1f9..c04d529290fff 100644
--- a/lldb/include/lldb/Target/Statistics.h
+++ b/lldb/include/lldb/Target/Statistics.h
@@ -133,6 +133,7 @@ struct ConstStringStats {
 struct StatisticsOptions {
   bool summary_only = false;
   bool load_all_debug_info = false;
+  bool include_transcript = false;
 };
 
 /// A class that represents statistics for a since lldb_private::Target.

diff  --git a/lldb/source/Commands/CommandObjectStats.cpp b/lldb/source/Commands/CommandObjectStats.cpp
index a92bb5d1165ee..1935b0fdfadfb 100644
--- a/lldb/source/Commands/CommandObjectStats.cpp
+++ b/lldb/source/Commands/CommandObjectStats.cpp
@@ -81,6 +81,9 @@ class CommandObjectStatsDump : public CommandObjectParsed {
       case 'f':
         m_stats_options.load_all_debug_info = true;
         break;
+      case 't':
+        m_stats_options.include_transcript = true;
+        break;
       default:
         llvm_unreachable("Unimplemented option");
       }

diff  --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 62bbfdc117834..194902abdce49 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -1425,4 +1425,8 @@ let Command = "statistics dump" in {
     Desc<"Dump the total possible debug info statistics. "
     "Force loading all the debug information if not yet loaded, and collect "
     "statistics with those.">;
+  def statistics_dump_transcript: Option<"transcript", "t">, Group<1>,
+    Desc<"If the setting interpreter.save-transcript is enabled and this "
+    "option is specified, include a JSON array with all commands the user and/"
+    "or scripts executed during a debug session.">;
 }

diff  --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 6a61882df093d..acd6294cb3f42 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <chrono>
 #include <cstdlib>
 #include <limits>
 #include <memory>
@@ -1909,6 +1910,11 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
 
     transcript_item = std::make_shared<StructuredData::Dictionary>();
     transcript_item->AddStringItem("command", command_line);
+    transcript_item->AddIntegerItem(
+        "timestampInEpochSeconds",
+        std::chrono::duration_cast<std::chrono::seconds>(
+            std::chrono::system_clock::now().time_since_epoch())
+            .count());
     m_transcript.AddItem(transcript_item);
   }
 
@@ -2056,6 +2062,14 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
         log, "HandleCommand, command line after removing command name(s): '%s'",
         remainder.c_str());
 
+    // To test whether or not transcript should be saved, `transcript_item` is
+    // used instead of `GetSaveTrasncript()`. This is because the latter will
+    // fail when the command is "settings set interpreter.save-transcript true".
+    if (transcript_item) {
+      transcript_item->AddStringItem("commandName", cmd_obj->GetCommandName());
+      transcript_item->AddStringItem("commandArguments", remainder);
+    }
+
     ElapsedTime elapsed(execute_time);
     cmd_obj->Execute(remainder.c_str(), result);
   }
@@ -2072,7 +2086,8 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
 
     transcript_item->AddStringItem("output", result.GetOutputData());
     transcript_item->AddStringItem("error", result.GetErrorData());
-    transcript_item->AddFloatItem("seconds", execute_time.get().count());
+    transcript_item->AddFloatItem("durationInSeconds",
+                                  execute_time.get().count());
   }
 
   return result.Succeeded();

diff  --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp
index 7f866ae0ef324..be0848573f812 100644
--- a/lldb/source/Target/Statistics.cpp
+++ b/lldb/source/Target/Statistics.cpp
@@ -16,6 +16,7 @@
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Target/UnixSignals.h"
+#include "lldb/Utility/StructuredData.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -225,6 +226,7 @@ llvm::json::Value DebuggerStats::ReportStatistics(
 
   const bool summary_only = options.summary_only;
   const bool load_all_debug_info = options.load_all_debug_info;
+  const bool include_transcript = options.include_transcript;
 
   json::Array json_targets;
   json::Array json_modules;
@@ -364,5 +366,36 @@ llvm::json::Value DebuggerStats::ReportStatistics(
     global_stats.try_emplace("commands", std::move(cmd_stats));
   }
 
+  if (include_transcript) {
+    // When transcript is available, add it to the to-be-returned statistics.
+    //
+    // NOTE:
+    // When the statistics is polled by an LLDB command:
+    // - The transcript in the returned statistics *will NOT* contain the
+    //   returned statistics itself (otherwise infinite recursion).
+    // - The returned statistics *will* be written to the internal transcript
+    //   buffer. It *will* appear in the next statistcs or transcript poll.
+    //
+    // For example, let's say the following commands are run in order:
+    // - "version"
+    // - "statistics dump"  <- call it "A"
+    // - "statistics dump"  <- call it "B"
+    // The output of "A" will contain the transcript of "version" and
+    // "statistics dump" (A), with the latter having empty output. The output
+    // of B will contain the trascnript of "version", "statistics dump" (A),
+    // "statistics dump" (B), with A's output populated and B's output empty.
+    const StructuredData::Array &transcript =
+        debugger.GetCommandInterpreter().GetTranscript();
+    if (transcript.GetSize() != 0) {
+      std::string buffer;
+      llvm::raw_string_ostream ss(buffer);
+      json::OStream json_os(ss);
+      transcript.Serialize(json_os);
+      if (auto json_transcript = llvm::json::parse(ss.str()))
+        global_stats.try_emplace("transcript",
+                                 std::move(json_transcript.get()));
+    }
+  }
+
   return std::move(global_stats);
 }

diff  --git a/lldb/test/API/commands/statistics/basic/TestStats.py b/lldb/test/API/commands/statistics/basic/TestStats.py
index fb6fc07d2d443..ebe6166eb45e5 100644
--- a/lldb/test/API/commands/statistics/basic/TestStats.py
+++ b/lldb/test/API/commands/statistics/basic/TestStats.py
@@ -623,3 +623,50 @@ def test_had_frame_variable_errors(self):
         # Verify that the top level statistic that aggregates the number of
         # modules with debugInfoHadVariableErrors is greater than zero
         self.assertGreater(stats["totalModuleCountWithVariableErrors"], 0)
+
+    def test_transcript_happy_path(self):
+        """
+        Test "statistics dump" and the transcript information.
+        """
+        self.build()
+        exe = self.getBuildArtifact("a.out")
+        target = self.createTestTarget(file_path=exe)
+        self.runCmd("settings set interpreter.save-transcript true")
+        self.runCmd("version")
+
+        # Verify the output of a first "statistics dump"
+        debug_stats = self.get_stats("--transcript")
+        self.assertIn("transcript", debug_stats)
+        transcript = debug_stats["transcript"]
+        self.assertEqual(len(transcript), 2)
+        self.assertEqual(transcript[0]["commandName"], "version")
+        self.assertEqual(transcript[1]["commandName"], "statistics dump")
+        # The first "statistics dump" in the transcript should have no output
+        self.assertNotIn("output", transcript[1])
+
+        # Verify the output of a second "statistics dump"
+        debug_stats = self.get_stats("--transcript")
+        self.assertIn("transcript", debug_stats)
+        transcript = debug_stats["transcript"]
+        self.assertEqual(len(transcript), 3)
+        self.assertEqual(transcript[0]["commandName"], "version")
+        self.assertEqual(transcript[1]["commandName"], "statistics dump")
+        # The first "statistics dump" in the transcript should have output now
+        self.assertIn("output", transcript[1])
+        self.assertEqual(transcript[2]["commandName"], "statistics dump")
+        # The second "statistics dump" in the transcript should have no output
+        self.assertNotIn("output", transcript[2])
+
+    def test_transcript_should_not_exist_when_not_asked_for(self):
+        """
+        Test "statistics dump" and the transcript information.
+        """
+        self.build()
+        exe = self.getBuildArtifact("a.out")
+        target = self.createTestTarget(file_path=exe)
+        self.runCmd("settings set interpreter.save-transcript true")
+        self.runCmd("version")
+
+        # Verify the output of a first "statistics dump"
+        debug_stats = self.get_stats()  # Not with "--transcript"
+        self.assertNotIn("transcript", debug_stats)

diff  --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index 95643eef0d344..6814ccec16c48 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -104,7 +104,7 @@ def getTranscriptAsPythonObject(self, ci):
 
         return json.loads(stream.GetData())
 
-    def test_structured_transcript(self):
+    def test_get_transcript(self):
         """Test structured transcript generation and retrieval."""
         ci = self.buildAndCreateTarget()
 
@@ -118,7 +118,7 @@ def test_structured_transcript(self):
         res = lldb.SBCommandReturnObject()
         ci.HandleCommand("version", res)
         ci.HandleCommand("an-unknown-command", res)
-        ci.HandleCommand("breakpoint set -f main.c -l %d" % self.line, res)
+        ci.HandleCommand("br s -f main.c -l %d" % self.line, res)
         ci.HandleCommand("r", res)
         ci.HandleCommand("p a", res)
         ci.HandleCommand("statistics dump", res)
@@ -130,22 +130,28 @@ def test_structured_transcript(self):
         # All commands should have expected fields.
         for command in transcript:
             self.assertIn("command", command)
+            # Unresolved commands don't have "commandName"/"commandArguments".
+            # We will validate these fields below, instead of here.
             self.assertIn("output", command)
             self.assertIn("error", command)
-            self.assertIn("seconds", command)
+            self.assertIn("durationInSeconds", command)
+            self.assertIn("timestampInEpochSeconds", command)
 
         # The following validates individual commands in the transcript.
         #
         # Notes:
         # 1. Some of the asserts rely on the exact output format of the
         #    commands. Hopefully we are not changing them any time soon.
-        # 2. We are removing the "seconds" field from each command, so that
-        #    some of the validations below can be easier / more readable.
+        # 2. We are removing the time-related fields from each command, so
+        #    that some of the validations below can be easier / more readable.
         for command in transcript:
-            del(command["seconds"])
+            del command["durationInSeconds"]
+            del command["timestampInEpochSeconds"]
 
         # (lldb) version
         self.assertEqual(transcript[0]["command"], "version")
+        self.assertEqual(transcript[0]["commandName"], "version")
+        self.assertEqual(transcript[0]["commandArguments"], "")
         self.assertIn("lldb version", transcript[0]["output"])
         self.assertEqual(transcript[0]["error"], "")
 
@@ -153,18 +159,25 @@ def test_structured_transcript(self):
         self.assertEqual(transcript[1],
             {
                 "command": "an-unknown-command",
+                # Unresolved commands don't have "commandName"/"commandArguments"
                 "output": "",
                 "error": "error: 'an-unknown-command' is not a valid command.\n",
             })
 
-        # (lldb) breakpoint set -f main.c -l <line>
-        self.assertEqual(transcript[2]["command"], "breakpoint set -f main.c -l %d" % self.line)
+        # (lldb) br s -f main.c -l <line>
+        self.assertEqual(transcript[2]["command"], "br s -f main.c -l %d" % self.line)
+        self.assertEqual(transcript[2]["commandName"], "breakpoint set")
+        self.assertEqual(
+            transcript[2]["commandArguments"], "-f main.c -l %d" % self.line
+        )
         # Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address = 0x0000000100000f7d
         self.assertIn("Breakpoint 1: where = a.out`main ", transcript[2]["output"])
         self.assertEqual(transcript[2]["error"], "")
 
         # (lldb) r
         self.assertEqual(transcript[3]["command"], "r")
+        self.assertEqual(transcript[3]["commandName"], "process launch")
+        self.assertEqual(transcript[3]["commandArguments"], "-X true --")
         # Process 25494 launched: '<path>/TestCommandInterpreterAPI.test_structured_transcript/a.out' (x86_64)
         self.assertIn("Process", transcript[3]["output"])
         self.assertIn("launched", transcript[3]["output"])
@@ -174,11 +187,17 @@ def test_structured_transcript(self):
         self.assertEqual(transcript[4],
             {
                 "command": "p a",
+                "commandName": "dwim-print",
+                "commandArguments": "-- a",
                 "output": "(int) 123\n",
                 "error": "",
             })
 
         # (lldb) statistics dump
+        self.assertEqual(transcript[5]["command"], "statistics dump")
+        self.assertEqual(transcript[5]["commandName"], "statistics dump")
+        self.assertEqual(transcript[5]["commandArguments"], "")
+        self.assertEqual(transcript[5]["error"], "")
         statistics_dump = json.loads(transcript[5]["output"])
         # Dump result should be valid JSON
         self.assertTrue(statistics_dump is not json.JSONDecodeError)
@@ -194,7 +213,6 @@ def test_save_transcript_setting_default(self):
 
         # The setting's default value should be "false"
         self.runCmd("settings show interpreter.save-transcript", "interpreter.save-transcript (boolean) = false\n")
-        # self.assertEqual(res.GetOutput(), )
 
     def test_save_transcript_setting_off(self):
         ci = self.buildAndCreateTarget()
@@ -220,7 +238,7 @@ def test_save_transcript_setting_on(self):
         self.assertEqual(len(transcript), 1)
         self.assertEqual(transcript[0]["command"], "version")
 
-    def test_save_transcript_returns_copy(self):
+    def test_get_transcript_returns_copy(self):
         """
         Test that the returned structured data is *at least* a shallow copy.