[Lldb-commits] [lldb] Add new Python API `SBCommandInterpreter::GetTranscript()` (PR #90703)

via lldb-commits lldb-commits at lists.llvm.org
Thu May 2 21:09:17 PDT 2024


https://github.com/royitaqi updated https://github.com/llvm/llvm-project/pull/90703

>From 0fd67e2de7e702ce6f7353845454ea7ff9f980d6 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Tue, 30 Apr 2024 21:35:49 -0700
Subject: [PATCH 01/10] Add SBCommandInterpreter::GetTranscript()

---
 lldb/include/lldb/API/SBCommandInterpreter.h | 12 +++++++++---
 lldb/source/API/SBCommandInterpreter.cpp     |  7 ++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/lldb/include/lldb/API/SBCommandInterpreter.h b/lldb/include/lldb/API/SBCommandInterpreter.h
index ba2e049204b8e6..d65f06d676f91f 100644
--- a/lldb/include/lldb/API/SBCommandInterpreter.h
+++ b/lldb/include/lldb/API/SBCommandInterpreter.h
@@ -247,13 +247,13 @@ class SBCommandInterpreter {
                                        lldb::SBStringList &matches,
                                        lldb::SBStringList &descriptions);
 
-  /// Returns whether an interrupt flag was raised either by the SBDebugger - 
+  /// Returns whether an interrupt flag was raised either by the SBDebugger -
   /// when the function is not running on the RunCommandInterpreter thread, or
   /// by SBCommandInterpreter::InterruptCommand if it is.  If your code is doing
-  /// interruptible work, check this API periodically, and interrupt if it 
+  /// interruptible work, check this API periodically, and interrupt if it
   /// returns true.
   bool WasInterrupted() const;
-  
+
   /// Interrupts the command currently executing in the RunCommandInterpreter
   /// thread.
   ///
@@ -318,6 +318,12 @@ class SBCommandInterpreter {
 
   SBStructuredData GetStatistics();
 
+  /// Returns a list of handled commands, output and error. Each element in
+  /// the list is a dictionary with three keys: "command" (string), "output"
+  /// (list of strings) and optionally "error" (list of strings). Each string
+  /// in "output" and "error" is a line (without EOL characteres).
+  SBStructuredData GetTranscript();
+
 protected:
   friend class lldb_private::CommandPluginInterfaceImplementation;
 
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index 83c0951c56db60..242b3f8f09c48a 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -150,7 +150,7 @@ bool SBCommandInterpreter::WasInterrupted() const {
 
 bool SBCommandInterpreter::InterruptCommand() {
   LLDB_INSTRUMENT_VA(this);
-  
+
   return (IsValid() ? m_opaque_ptr->InterruptCommand() : false);
 }
 
@@ -571,6 +571,11 @@ SBStructuredData SBCommandInterpreter::GetStatistics() {
   return data;
 }
 
+SBStructuredData SBCommandInterpreter::GetTranscript() {
+  LLDB_INSTRUMENT_VA(this);
+  return SBStructuredData();
+}
+
 lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
                                                           const char *help) {
   LLDB_INSTRUMENT_VA(this, name, help);

>From a1c948ceabaccdc3407e0c4eae0ebc594a9b68b7 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 13:45:47 -0700
Subject: [PATCH 02/10] Implement the new API

---
 .../lldb/Interpreter/CommandInterpreter.h     | 12 +++++--
 lldb/include/lldb/Utility/StructuredData.h    | 11 +++---
 lldb/source/API/SBCommandInterpreter.cpp      |  8 ++++-
 .../source/Interpreter/CommandInterpreter.cpp | 21 ++++++++++-
 lldb/source/Utility/StructuredData.cpp        | 35 +++++++++++++++++++
 5 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index 70a55a77465bfe..9474c41c0dcedd 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -22,6 +22,7 @@
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/StreamString.h"
 #include "lldb/Utility/StringList.h"
+#include "lldb/Utility/StructuredData.h"
 #include "lldb/lldb-forward.h"
 #include "lldb/lldb-private.h"
 
@@ -241,7 +242,7 @@ class CommandInterpreter : public Broadcaster,
     eCommandTypesAllThem = 0xFFFF  //< all commands
   };
 
-  // The CommandAlias and CommandInterpreter both have a hand in 
+  // The CommandAlias and CommandInterpreter both have a hand in
   // substituting for alias commands.  They work by writing special tokens
   // in the template form of the Alias command, and then detecting them when the
   // command is executed.  These are the special tokens:
@@ -576,7 +577,7 @@ class CommandInterpreter : public Broadcaster,
   void SetEchoCommentCommands(bool enable);
 
   bool GetRepeatPreviousCommand() const;
-  
+
   bool GetRequireCommandOverwrite() const;
 
   const CommandObject::CommandMap &GetUserCommands() const {
@@ -647,6 +648,7 @@ class CommandInterpreter : public Broadcaster,
   }
 
   llvm::json::Value GetStatistics();
+  StructuredData::ArraySP GetTranscript() const;
 
 protected:
   friend class Debugger;
@@ -766,6 +768,12 @@ class CommandInterpreter : public Broadcaster,
   CommandUsageMap m_command_usages;
 
   StreamString m_transcript_stream;
+
+  /// Contains a list of handled commands, output and error. Each element in
+  /// the list is a dictionary with three keys: "command" (string), "output"
+  /// (list of strings) and optionally "error" (list of strings). Each string
+  /// in "output" and "error" is a line (without EOL characteres).
+  StructuredData::ArraySP m_transcript_structured;
 };
 
 } // namespace lldb_private
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 5e63ef92fac3ec..72fd035c23e47e 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -290,6 +290,9 @@ class StructuredData {
 
     void GetDescription(lldb_private::Stream &s) const override;
 
+    static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit,
+                               bool keepEmpty);
+
   protected:
     typedef std::vector<ObjectSP> collection;
     collection m_items;
@@ -366,10 +369,10 @@ class StructuredData {
   class String : public Object {
   public:
     String() : Object(lldb::eStructuredDataTypeString) {}
-    explicit String(llvm::StringRef S)
-        : Object(lldb::eStructuredDataTypeString), m_value(S) {}
+    explicit String(llvm::StringRef s)
+        : Object(lldb::eStructuredDataTypeString), m_value(s) {}
 
-    void SetValue(llvm::StringRef S) { m_value = std::string(S); }
+    void SetValue(llvm::StringRef s) { m_value = std::string(s); }
 
     llvm::StringRef GetValue() { return m_value; }
 
@@ -432,7 +435,7 @@ class StructuredData {
       }
       return success;
     }
-      
+
     template <class IntType>
     bool GetValueForKeyAsInteger(llvm::StringRef key, IntType &result) const {
       ObjectSP value_sp = GetValueForKey(key);
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index 242b3f8f09c48a..e96b5a047c64d5 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -573,7 +573,13 @@ SBStructuredData SBCommandInterpreter::GetStatistics() {
 
 SBStructuredData SBCommandInterpreter::GetTranscript() {
   LLDB_INSTRUMENT_VA(this);
-  return SBStructuredData();
+
+  SBStructuredData data;
+  if (!IsValid())
+    return data;
+
+  data.m_impl_up->SetObjectSP(m_opaque_ptr->GetTranscript());
+  return data;
 }
 
 lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 4c58ecc3c1848f..b5f726d3234655 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -51,6 +51,7 @@
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/State.h"
 #include "lldb/Utility/Stream.h"
+#include "lldb/Utility/StructuredData.h"
 #include "lldb/Utility/Timer.h"
 
 #include "lldb/Host/Config.h"
@@ -135,7 +136,8 @@ CommandInterpreter::CommandInterpreter(Debugger &debugger,
       m_skip_lldbinit_files(false), m_skip_app_init_files(false),
       m_comment_char('#'), m_batch_command_mode(false),
       m_truncation_warning(eNoOmission), m_max_depth_warning(eNoOmission),
-      m_command_source_depth(0) {
+      m_command_source_depth(0),
+      m_transcript_structured(std::make_shared<StructuredData::Array>()) {
   SetEventName(eBroadcastBitThreadShouldExit, "thread-should-exit");
   SetEventName(eBroadcastBitResetPrompt, "reset-prompt");
   SetEventName(eBroadcastBitQuitCommandReceived, "quit");
@@ -1891,6 +1893,10 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
 
   m_transcript_stream << "(lldb) " << command_line << '\n';
 
+  auto transcript_item = std::make_shared<StructuredData::Dictionary>();
+  transcript_item->AddStringItem("command", command_line);
+  m_transcript_structured->AddItem(transcript_item);
+
   bool empty_command = false;
   bool comment_command = false;
   if (command_string.empty())
@@ -2044,6 +2050,15 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
   m_transcript_stream << result.GetOutputData();
   m_transcript_stream << result.GetErrorData();
 
+  // Add output and error to the transcript item after splitting lines. In the
+  // future, other aspects of the command (e.g. perf) can be added, too.
+  transcript_item->AddItem(
+      "output", StructuredData::Array::SplitString(result.GetOutputData(), '\n',
+                                                   -1, false));
+  transcript_item->AddItem(
+      "error", StructuredData::Array::SplitString(result.GetErrorData(), '\n',
+                                                  -1, false));
+
   return result.Succeeded();
 }
 
@@ -3554,3 +3569,7 @@ llvm::json::Value CommandInterpreter::GetStatistics() {
     stats.try_emplace(command_usage.getKey(), command_usage.getValue());
   return stats;
 }
+
+StructuredData::ArraySP CommandInterpreter::GetTranscript() const {
+  return m_transcript_structured;
+}
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 7686d052c599c6..278ec93168926a 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -10,10 +10,13 @@
 #include "lldb/Utility/FileSpec.h"
 #include "lldb/Utility/Status.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cerrno>
 #include <cinttypes>
 #include <cstdlib>
+#include <memory>
+#include <sstream>
 
 using namespace lldb_private;
 using namespace llvm;
@@ -289,3 +292,35 @@ void StructuredData::Null::GetDescription(lldb_private::Stream &s) const {
 void StructuredData::Generic::GetDescription(lldb_private::Stream &s) const {
   s.Printf("%p", m_object);
 }
+
+/// This is the same implementation as `StringRef::split`. Not depending on
+/// `StringRef::split` because it will involve a temporary `SmallVectorImpl`.
+StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
+                                                           char separator,
+                                                           int maxSplit,
+                                                           bool keepEmpty) {
+  auto array_sp = std::make_shared<StructuredData::Array>();
+
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (maxSplit-- != 0) {
+    size_t idx = s.find(separator);
+    if (idx == llvm::StringLiteral::npos)
+      break;
+
+    // Push this split.
+    if (keepEmpty || idx > 0)
+      array_sp->AddStringItem(s.slice(0, idx));
+
+    // Jump forward.
+    s = s.slice(idx + 1, llvm::StringLiteral::npos);
+  }
+
+  // Push the tail.
+  if (keepEmpty || !s.empty())
+    array_sp->AddStringItem(s);
+
+  return array_sp;
+}

>From efc1c2037da00dacddc3e52812f93377d41d4f82 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 14:45:48 -0700
Subject: [PATCH 03/10] Add unittest

---
 .../interpreter/TestCommandInterpreterAPI.py  | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index 8f9fbfc255bb02..93d36e3388941c 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -1,5 +1,6 @@
 """Test the SBCommandInterpreter APIs."""
 
+import json
 import lldb
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
@@ -85,3 +86,44 @@ def test_command_output(self):
         self.assertEqual(res.GetOutput(), "")
         self.assertIsNotNone(res.GetError())
         self.assertEqual(res.GetError(), "")
+
+    def test_structured_transcript(self):
+        """Test structured transcript generation and retrieval."""
+        ci = self.dbg.GetCommandInterpreter()
+        self.assertTrue(ci, VALID_COMMAND_INTERPRETER)
+
+        # Send a few commands through the command interpreter
+        res = lldb.SBCommandReturnObject()
+        ci.HandleCommand("version", res)
+        ci.HandleCommand("an-unknown-command", res)
+
+        # Retrieve the transcript and convert it into a Python object
+        transcript = ci.GetTranscript()
+        self.assertTrue(transcript.IsValid())
+
+        stream = lldb.SBStream()
+        self.assertTrue(stream)
+
+        error = transcript.GetAsJSON(stream)
+        self.assertSuccess(error)
+
+        transcript = json.loads(stream.GetData())
+
+        # Validate the transcript.
+        #
+        # Notes:
+        # 1. The following asserts rely on the exact output format of the
+        #    commands. Hopefully we are not changing them any time soon.
+        # 2. The transcript will contain a bunch of commands that are run
+        #    automatically. We only want to validate for the ones that are
+        #    handled in the above, hence the negative indices to find them.
+        self.assertEqual(transcript[-2]["command"], "version")
+        self.assertTrue("lldb version" in transcript[-2]["output"][0])
+        self.assertEqual(transcript[-1],
+            {
+                "command": "an-unknown-command",
+                "output": [],
+                "error": [
+                    "error: 'an-unknown-command' is not a valid command.",
+                ],
+            })

>From 6d1190df0ecae0fa49519545526636e84ee9b394 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 15:20:38 -0700
Subject: [PATCH 04/10] Add more test asserts and some touch ups

---
 .../lldb/Interpreter/CommandInterpreter.h     |  2 +-
 .../source/Interpreter/CommandInterpreter.cpp |  2 +
 lldb/source/Utility/StructuredData.cpp        |  2 -
 .../interpreter/TestCommandInterpreterAPI.py  | 63 ++++++++++++++++---
 lldb/test/API/python_api/interpreter/main.c   |  5 +-
 5 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index 9474c41c0dcedd..c0846db8f2b8a2 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -772,7 +772,7 @@ class CommandInterpreter : public Broadcaster,
   /// Contains a list of handled commands, output and error. Each element in
   /// the list is a dictionary with three keys: "command" (string), "output"
   /// (list of strings) and optionally "error" (list of strings). Each string
-  /// in "output" and "error" is a line (without EOL characteres).
+  /// in "output" and "error" is a line (without EOL characters).
   StructuredData::ArraySP m_transcript_structured;
 };
 
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index b5f726d3234655..1ec1da437ba3ac 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -1893,6 +1893,8 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
 
   m_transcript_stream << "(lldb) " << command_line << '\n';
 
+  // The same `transcript_item` will be used below to add output and error of
+  // the command.
   auto transcript_item = std::make_shared<StructuredData::Dictionary>();
   transcript_item->AddStringItem("command", command_line);
   m_transcript_structured->AddItem(transcript_item);
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 278ec93168926a..7870334d708fe9 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -15,8 +15,6 @@
 #include <cerrno>
 #include <cinttypes>
 #include <cstdlib>
-#include <memory>
-#include <sstream>
 
 using namespace lldb_private;
 using namespace llvm;
diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index 93d36e3388941c..e5cb4a18f7df6f 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -89,6 +89,13 @@ def test_command_output(self):
 
     def test_structured_transcript(self):
         """Test structured transcript generation and retrieval."""
+        # Get command interpreter and create a target
+        self.build()
+        exe = self.getBuildArtifact("a.out")
+
+        target = self.dbg.CreateTarget(exe)
+        self.assertTrue(target, VALID_TARGET)
+
         ci = self.dbg.GetCommandInterpreter()
         self.assertTrue(ci, VALID_COMMAND_INTERPRETER)
 
@@ -96,6 +103,10 @@ def test_structured_transcript(self):
         res = lldb.SBCommandReturnObject()
         ci.HandleCommand("version", res)
         ci.HandleCommand("an-unknown-command", res)
+        ci.HandleCommand("breakpoint set -f main.c -l %d" % self.line, res)
+        ci.HandleCommand("r", res)
+        ci.HandleCommand("p a", res)
+        total_number_of_commands = 5
 
         # Retrieve the transcript and convert it into a Python object
         transcript = ci.GetTranscript()
@@ -109,17 +120,25 @@ def test_structured_transcript(self):
 
         transcript = json.loads(stream.GetData())
 
+        # The transcript will contain a bunch of commands that are run
+        # automatically. We only want to validate for the ones that are
+        # listed above, hence trimming to the last parts.
+        transcript = transcript[-total_number_of_commands:]
+
+        print(transcript)
+
         # Validate the transcript.
         #
-        # Notes:
-        # 1. The following asserts rely on the exact output format of the
-        #    commands. Hopefully we are not changing them any time soon.
-        # 2. The transcript will contain a bunch of commands that are run
-        #    automatically. We only want to validate for the ones that are
-        #    handled in the above, hence the negative indices to find them.
-        self.assertEqual(transcript[-2]["command"], "version")
-        self.assertTrue("lldb version" in transcript[-2]["output"][0])
-        self.assertEqual(transcript[-1],
+        # The following asserts rely on the exact output format of the
+        # commands. Hopefully we are not changing them any time soon.
+
+        # (lldb) version
+        self.assertEqual(transcript[0]["command"], "version")
+        self.assertTrue("lldb version" in transcript[0]["output"][0])
+        self.assertEqual(transcript[0]["error"], [])
+
+        # (lldb) an-unknown-command
+        self.assertEqual(transcript[1],
             {
                 "command": "an-unknown-command",
                 "output": [],
@@ -127,3 +146,29 @@ def test_structured_transcript(self):
                     "error: 'an-unknown-command' is not a valid command.",
                 ],
             })
+
+        # (lldb) breakpoint set -f main.c -l X
+        self.assertEqual(transcript[2],
+            {
+                "command": "breakpoint set -f main.c -l %d" % self.line,
+                "output": [
+                    "Breakpoint 1: where = a.out`main + 29 at main.c:5:5, address = 0x0000000100000f7d",
+                ],
+                "error": [],
+            })
+
+        # (lldb) r
+        self.assertEqual(transcript[3]["command"], "r")
+        self.assertTrue("Process" in transcript[3]["output"][0])
+        self.assertTrue("launched" in transcript[3]["output"][0])
+        self.assertEqual(transcript[3]["error"], [])
+
+        # (lldb) p a
+        self.assertEqual(transcript[4],
+            {
+                "command": "p a",
+                "output": [
+                    "(int) 123",
+                ],
+                "error": [],
+            })
diff --git a/lldb/test/API/python_api/interpreter/main.c b/lldb/test/API/python_api/interpreter/main.c
index 277aa54a4eea52..366ffde5fdef51 100644
--- a/lldb/test/API/python_api/interpreter/main.c
+++ b/lldb/test/API/python_api/interpreter/main.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 int main(int argc, char const *argv[]) {
-    printf("Hello world.\n");
-    return 0;
+  int a = 123;
+  printf("Hello world.\n");
+  return 0;
 }

>From 26a726b2f94713ef8508049115ab93ee91e9a836 Mon Sep 17 00:00:00 2001
From: royitaqi <royitaqi at users.noreply.github.com>
Date: Wed, 1 May 2024 15:27:33 -0700
Subject: [PATCH 05/10] Apply suggestions from code review

Co-authored-by: Med Ismail Bennani <ismail at bennani.ma>
---
 lldb/source/API/SBCommandInterpreter.cpp | 6 ++----
 lldb/source/Utility/StructuredData.cpp   | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index e96b5a047c64d5..233a2f97fb9f15 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -575,10 +575,8 @@ SBStructuredData SBCommandInterpreter::GetTranscript() {
   LLDB_INSTRUMENT_VA(this);
 
   SBStructuredData data;
-  if (!IsValid())
-    return data;
-
-  data.m_impl_up->SetObjectSP(m_opaque_ptr->GetTranscript());
+  if (IsValid())
+    data.m_impl_up->SetObjectSP(m_opaque_ptr->GetTranscript());
   return data;
 }
 
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 7870334d708fe9..4ca804cb76a74b 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -299,9 +299,9 @@ StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
                                                            bool keepEmpty) {
   auto array_sp = std::make_shared<StructuredData::Array>();
 
-  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // Count down from `maxSplit`. When `maxSplit` is -1, this will just split
   // "forever". This doesn't support splitting more than 2^31 times
-  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // intentionally; if we ever want that we can make `maxSplit` a 64-bit integer
   // but that seems unlikely to be useful.
   while (maxSplit-- != 0) {
     size_t idx = s.find(separator);

>From 52a310b8c236d252233b6e49de48a0c53eab9f45 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 16:07:44 -0700
Subject: [PATCH 06/10] Move and add document for Array::SplitString

---
 lldb/include/lldb/Utility/StructuredData.h | 25 ++++++++++++++++++++--
 lldb/source/Utility/StructuredData.cpp     |  2 --
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 72fd035c23e47e..69db0caca20510 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -290,8 +290,29 @@ class StructuredData {
 
     void GetDescription(lldb_private::Stream &s) const override;
 
-    static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit,
-                               bool keepEmpty);
+    /// Creates an Array of substrings by splitting a string around the occurrences of a separator character.
+    ///
+    /// Note:
+    /// * This is almost the same API and implementation as `StringRef::split`.
+    /// * Not depending on `StringRef::split` because it will involve a
+    ///   temporary `SmallVectorImpl`.
+    ///
+    /// \param[in] s
+    ///   The input string.
+    ///
+    /// \param[in] separator
+    ///   The character to split on.
+    ///
+    /// \param[in] maxSplit
+    ///   The maximum number of times the string is split. If \a maxSplit is >= 0, at most \a maxSplit splits are done and consequently <= \a maxSplit + 1 elements are returned.
+    ///
+    /// \param[in] keepEmpty
+    ///   True if empty substrings should be returned. Empty substrings still count when considering \a maxSplit.
+    ///
+    /// \return
+    ///   An array containing the substrings. If \a maxSplit == -1 and \a keepEmpty == true, then the concatination of the array forms the input string.
+    static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit = -1,
+                               bool keepEmpty = true);
 
   protected:
     typedef std::vector<ObjectSP> collection;
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 4ca804cb76a74b..7fa1063e5f01fa 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -291,8 +291,6 @@ void StructuredData::Generic::GetDescription(lldb_private::Stream &s) const {
   s.Printf("%p", m_object);
 }
 
-/// This is the same implementation as `StringRef::split`. Not depending on
-/// `StringRef::split` because it will involve a temporary `SmallVectorImpl`.
 StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
                                                            char separator,
                                                            int maxSplit,

>From 9beff0b2fdbac700f2aec6047ea90356ffecbce7 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 16:53:25 -0700
Subject: [PATCH 07/10] Add unit test for Array::SplitString

---
 lldb/unittests/Utility/StructuredDataTest.cpp | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/lldb/unittests/Utility/StructuredDataTest.cpp b/lldb/unittests/Utility/StructuredDataTest.cpp
index e536039f365a4b..f1074909463343 100644
--- a/lldb/unittests/Utility/StructuredDataTest.cpp
+++ b/lldb/unittests/Utility/StructuredDataTest.cpp
@@ -12,6 +12,7 @@
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/StreamString.h"
 #include "lldb/Utility/StructuredData.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Path.h"
 
 using namespace lldb;
@@ -112,3 +113,65 @@ TEST(StructuredDataTest, ParseJSONFromFile) {
   object_sp->Dump(S, false);
   EXPECT_EQ("[1,2,3]", S.GetString());
 }
+
+struct ArraySplitStringTestCase {
+  llvm::StringRef s;
+  char separator;
+  int maxSplit;
+  bool keepEmpty;
+  std::vector<std::string> expected;
+};
+
+TEST(StructuredDataTest, ArraySplitString) {
+  ArraySplitStringTestCase test_cases[] = {
+      // Happy path
+      {
+          "1,2,,3",
+          ',',
+          -1,
+          true,
+          {"1", "2", "", "3"},
+      },
+      // No splits
+      {
+          "1,2,,3",
+          ',',
+          0,
+          true,
+          {"1,2,,3"},
+      },
+      // 1 split
+      {
+          "1,2,,3",
+          ',',
+          1,
+          true,
+          {"1", "2,,3"},
+      },
+      // No empty substrings
+      {
+          "1,2,,3",
+          ',',
+          -1,
+          false,
+          {"1", "2", "3"},
+      },
+      // Empty substrings count towards splits
+      {
+          ",1,2,3",
+          ',',
+          1,
+          false,
+          {"1,2,3"},
+      },
+  };
+  for (const auto &test_case : test_cases) {
+    auto array = StructuredData::Array::SplitString(
+        test_case.s, test_case.separator, test_case.maxSplit,
+        test_case.keepEmpty);
+    EXPECT_EQ(test_case.expected.size(), array->GetSize());
+    for (unsigned int i = 0; i < test_case.expected.size(); ++i) {
+      EXPECT_EQ(test_case.expected[i], array->GetItemAtIndexAsString(i)->str());
+    }
+  }
+}

>From 6c6c5c272511f8c62c7ef14eefe67904875b4d2c Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 17:07:12 -0700
Subject: [PATCH 08/10] Fix format

---
 lldb/include/lldb/Utility/StructuredData.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 69db0caca20510..8217d2bf33b808 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -290,7 +290,8 @@ class StructuredData {
 
     void GetDescription(lldb_private::Stream &s) const override;
 
-    /// Creates an Array of substrings by splitting a string around the occurrences of a separator character.
+    /// Creates an Array of substrings by splitting a string around the
+    /// occurrences of a separator character.
     ///
     /// Note:
     /// * This is almost the same API and implementation as `StringRef::split`.
@@ -304,15 +305,20 @@ class StructuredData {
     ///   The character to split on.
     ///
     /// \param[in] maxSplit
-    ///   The maximum number of times the string is split. If \a maxSplit is >= 0, at most \a maxSplit splits are done and consequently <= \a maxSplit + 1 elements are returned.
+    ///   The maximum number of times the string is split. If \a maxSplit is >=
+    ///   0, at most \a maxSplit splits are done and consequently <= \a maxSplit
+    ///   + 1 elements are returned.
     ///
     /// \param[in] keepEmpty
-    ///   True if empty substrings should be returned. Empty substrings still count when considering \a maxSplit.
+    ///   True if empty substrings should be returned. Empty substrings still
+    ///   count when considering \a maxSplit.
     ///
     /// \return
-    ///   An array containing the substrings. If \a maxSplit == -1 and \a keepEmpty == true, then the concatination of the array forms the input string.
-    static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit = -1,
-                               bool keepEmpty = true);
+    ///   An array containing the substrings. If \a maxSplit == -1 and \a
+    ///   keepEmpty == true, then the concatination of the array forms the input
+    ///   string.
+    static ArraySP SplitString(llvm::StringRef s, char separator,
+                               int maxSplit = -1, bool keepEmpty = true);
 
   protected:
     typedef std::vector<ObjectSP> collection;

>From fbf8e9ab4cfe232218edb1eb4fe0c22d9a68f4a9 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 17:23:14 -0700
Subject: [PATCH 09/10] Improve Python API test reliability

---
 .../interpreter/TestCommandInterpreterAPI.py    | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index e5cb4a18f7df6f..5bb9f579ad13f5 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -125,8 +125,6 @@ def test_structured_transcript(self):
         # listed above, hence trimming to the last parts.
         transcript = transcript[-total_number_of_commands:]
 
-        print(transcript)
-
         # Validate the transcript.
         #
         # The following asserts rely on the exact output format of the
@@ -147,18 +145,15 @@ def test_structured_transcript(self):
                 ],
             })
 
-        # (lldb) breakpoint set -f main.c -l X
-        self.assertEqual(transcript[2],
-            {
-                "command": "breakpoint set -f main.c -l %d" % self.line,
-                "output": [
-                    "Breakpoint 1: where = a.out`main + 29 at main.c:5:5, address = 0x0000000100000f7d",
-                ],
-                "error": [],
-            })
+        # (lldb) breakpoint set -f main.c -l <line>
+        self.assertEqual(transcript[2]["command"], "breakpoint set -f main.c -l %d" % self.line)
+        # Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address = 0x0000000100000f7d
+        self.assertTrue("Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address =" in transcript[2]["output"][0])
+        self.assertEqual(transcript[2]["error"], [])
 
         # (lldb) r
         self.assertEqual(transcript[3]["command"], "r")
+        # Process 25494 launched: '<path>/TestCommandInterpreterAPI.test_structured_transcript/a.out' (x86_64)
         self.assertTrue("Process" in transcript[3]["output"][0])
         self.assertTrue("launched" in transcript[3]["output"][0])
         self.assertEqual(transcript[3]["error"], [])

>From 836a719f0f2124627ebb0ea625486bcc8cdf3cd5 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 2 May 2024 21:08:46 -0700
Subject: [PATCH 10/10] Update implementation of Array::SplitString, and other
 smaller changes

---
 lldb/include/lldb/API/SBCommandInterpreter.h  |  2 +-
 .../lldb/Interpreter/CommandInterpreter.h     |  2 +-
 lldb/include/lldb/Utility/StructuredData.h    | 11 ++------
 .../source/Interpreter/CommandInterpreter.cpp |  6 ++--
 lldb/source/Utility/StructuredData.cpp        | 28 +++++--------------
 5 files changed, 15 insertions(+), 34 deletions(-)

diff --git a/lldb/include/lldb/API/SBCommandInterpreter.h b/lldb/include/lldb/API/SBCommandInterpreter.h
index d65f06d676f91f..f56f7d844b0d13 100644
--- a/lldb/include/lldb/API/SBCommandInterpreter.h
+++ b/lldb/include/lldb/API/SBCommandInterpreter.h
@@ -321,7 +321,7 @@ class SBCommandInterpreter {
   /// Returns a list of handled commands, output and error. Each element in
   /// the list is a dictionary with three keys: "command" (string), "output"
   /// (list of strings) and optionally "error" (list of strings). Each string
-  /// in "output" and "error" is a line (without EOL characteres).
+  /// in "output" and "error" is a line (without EOL characters).
   SBStructuredData GetTranscript();
 
 protected:
diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index c0846db8f2b8a2..0938ad6ae78ab4 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -773,7 +773,7 @@ class CommandInterpreter : public Broadcaster,
   /// the list is a dictionary with three keys: "command" (string), "output"
   /// (list of strings) and optionally "error" (list of strings). Each string
   /// in "output" and "error" is a line (without EOL characters).
-  StructuredData::ArraySP m_transcript_structured;
+  StructuredData::ArraySP m_transcript;
 };
 
 } // namespace lldb_private
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 8217d2bf33b808..563eb1b1a284af 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -293,11 +293,6 @@ class StructuredData {
     /// Creates an Array of substrings by splitting a string around the
     /// occurrences of a separator character.
     ///
-    /// Note:
-    /// * This is almost the same API and implementation as `StringRef::split`.
-    /// * Not depending on `StringRef::split` because it will involve a
-    ///   temporary `SmallVectorImpl`.
-    ///
     /// \param[in] s
     ///   The input string.
     ///
@@ -396,10 +391,10 @@ class StructuredData {
   class String : public Object {
   public:
     String() : Object(lldb::eStructuredDataTypeString) {}
-    explicit String(llvm::StringRef s)
-        : Object(lldb::eStructuredDataTypeString), m_value(s) {}
+    explicit String(llvm::StringRef S)
+        : Object(lldb::eStructuredDataTypeString), m_value(S) {}
 
-    void SetValue(llvm::StringRef s) { m_value = std::string(s); }
+    void SetValue(llvm::StringRef S) { m_value = std::string(S); }
 
     llvm::StringRef GetValue() { return m_value; }
 
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 1ec1da437ba3ac..d5e37dee186a62 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -137,7 +137,7 @@ CommandInterpreter::CommandInterpreter(Debugger &debugger,
       m_comment_char('#'), m_batch_command_mode(false),
       m_truncation_warning(eNoOmission), m_max_depth_warning(eNoOmission),
       m_command_source_depth(0),
-      m_transcript_structured(std::make_shared<StructuredData::Array>()) {
+      m_transcript(std::make_shared<StructuredData::Array>()) {
   SetEventName(eBroadcastBitThreadShouldExit, "thread-should-exit");
   SetEventName(eBroadcastBitResetPrompt, "reset-prompt");
   SetEventName(eBroadcastBitQuitCommandReceived, "quit");
@@ -1897,7 +1897,7 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
   // the command.
   auto transcript_item = std::make_shared<StructuredData::Dictionary>();
   transcript_item->AddStringItem("command", command_line);
-  m_transcript_structured->AddItem(transcript_item);
+  m_transcript->AddItem(transcript_item);
 
   bool empty_command = false;
   bool comment_command = false;
@@ -3573,5 +3573,5 @@ llvm::json::Value CommandInterpreter::GetStatistics() {
 }
 
 StructuredData::ArraySP CommandInterpreter::GetTranscript() const {
-  return m_transcript_structured;
+  return m_transcript;
 }
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 7fa1063e5f01fa..e5f9a69fab2ab6 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -295,28 +295,14 @@ StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
                                                            char separator,
                                                            int maxSplit,
                                                            bool keepEmpty) {
-  auto array_sp = std::make_shared<StructuredData::Array>();
+  // Split the string into a small vector.
+  llvm::SmallVector<StringRef> small_vec;
+  s.split(small_vec, separator, maxSplit, keepEmpty);
 
-  // Count down from `maxSplit`. When `maxSplit` is -1, this will just split
-  // "forever". This doesn't support splitting more than 2^31 times
-  // intentionally; if we ever want that we can make `maxSplit` a 64-bit integer
-  // but that seems unlikely to be useful.
-  while (maxSplit-- != 0) {
-    size_t idx = s.find(separator);
-    if (idx == llvm::StringLiteral::npos)
-      break;
-
-    // Push this split.
-    if (keepEmpty || idx > 0)
-      array_sp->AddStringItem(s.slice(0, idx));
-
-    // Jump forward.
-    s = s.slice(idx + 1, llvm::StringLiteral::npos);
+  // Copy the substrings from the small vector into the output array.
+  auto array_sp = std::make_shared<StructuredData::Array>();
+  for (auto substring : small_vec) {
+    array_sp->AddStringItem(std::move(substring));
   }
-
-  // Push the tail.
-  if (keepEmpty || !s.empty())
-    array_sp->AddStringItem(s);
-
   return array_sp;
 }



More information about the lldb-commits mailing list