[Lldb-commits] [lldb] Add new Python API `SBCommandInterpreter::GetTranscript()` (PR #90703)
via lldb-commits
lldb-commits at lists.llvm.org
Mon May 6 10:50:29 PDT 2024
https://github.com/royitaqi updated https://github.com/llvm/llvm-project/pull/90703
>From 0fd67e2de7e702ce6f7353845454ea7ff9f980d6 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Tue, 30 Apr 2024 21:35:49 -0700
Subject: [PATCH 01/11] Add SBCommandInterpreter::GetTranscript()
---
lldb/include/lldb/API/SBCommandInterpreter.h | 12 +++++++++---
lldb/source/API/SBCommandInterpreter.cpp | 7 ++++++-
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/lldb/include/lldb/API/SBCommandInterpreter.h b/lldb/include/lldb/API/SBCommandInterpreter.h
index ba2e049204b8e6..d65f06d676f91f 100644
--- a/lldb/include/lldb/API/SBCommandInterpreter.h
+++ b/lldb/include/lldb/API/SBCommandInterpreter.h
@@ -247,13 +247,13 @@ class SBCommandInterpreter {
lldb::SBStringList &matches,
lldb::SBStringList &descriptions);
- /// Returns whether an interrupt flag was raised either by the SBDebugger -
+ /// Returns whether an interrupt flag was raised either by the SBDebugger -
/// when the function is not running on the RunCommandInterpreter thread, or
/// by SBCommandInterpreter::InterruptCommand if it is. If your code is doing
- /// interruptible work, check this API periodically, and interrupt if it
+ /// interruptible work, check this API periodically, and interrupt if it
/// returns true.
bool WasInterrupted() const;
-
+
/// Interrupts the command currently executing in the RunCommandInterpreter
/// thread.
///
@@ -318,6 +318,12 @@ class SBCommandInterpreter {
SBStructuredData GetStatistics();
+ /// Returns a list of handled commands, output and error. Each element in
+ /// the list is a dictionary with three keys: "command" (string), "output"
+ /// (list of strings) and optionally "error" (list of strings). Each string
+ /// in "output" and "error" is a line (without EOL characteres).
+ SBStructuredData GetTranscript();
+
protected:
friend class lldb_private::CommandPluginInterfaceImplementation;
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index 83c0951c56db60..242b3f8f09c48a 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -150,7 +150,7 @@ bool SBCommandInterpreter::WasInterrupted() const {
bool SBCommandInterpreter::InterruptCommand() {
LLDB_INSTRUMENT_VA(this);
-
+
return (IsValid() ? m_opaque_ptr->InterruptCommand() : false);
}
@@ -571,6 +571,11 @@ SBStructuredData SBCommandInterpreter::GetStatistics() {
return data;
}
+SBStructuredData SBCommandInterpreter::GetTranscript() {
+ LLDB_INSTRUMENT_VA(this);
+ return SBStructuredData();
+}
+
lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
const char *help) {
LLDB_INSTRUMENT_VA(this, name, help);
>From a1c948ceabaccdc3407e0c4eae0ebc594a9b68b7 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 13:45:47 -0700
Subject: [PATCH 02/11] Implement the new API
---
.../lldb/Interpreter/CommandInterpreter.h | 12 +++++--
lldb/include/lldb/Utility/StructuredData.h | 11 +++---
lldb/source/API/SBCommandInterpreter.cpp | 8 ++++-
.../source/Interpreter/CommandInterpreter.cpp | 21 ++++++++++-
lldb/source/Utility/StructuredData.cpp | 35 +++++++++++++++++++
5 files changed, 79 insertions(+), 8 deletions(-)
diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index 70a55a77465bfe..9474c41c0dcedd 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -22,6 +22,7 @@
#include "lldb/Utility/Log.h"
#include "lldb/Utility/StreamString.h"
#include "lldb/Utility/StringList.h"
+#include "lldb/Utility/StructuredData.h"
#include "lldb/lldb-forward.h"
#include "lldb/lldb-private.h"
@@ -241,7 +242,7 @@ class CommandInterpreter : public Broadcaster,
eCommandTypesAllThem = 0xFFFF //< all commands
};
- // The CommandAlias and CommandInterpreter both have a hand in
+ // The CommandAlias and CommandInterpreter both have a hand in
// substituting for alias commands. They work by writing special tokens
// in the template form of the Alias command, and then detecting them when the
// command is executed. These are the special tokens:
@@ -576,7 +577,7 @@ class CommandInterpreter : public Broadcaster,
void SetEchoCommentCommands(bool enable);
bool GetRepeatPreviousCommand() const;
-
+
bool GetRequireCommandOverwrite() const;
const CommandObject::CommandMap &GetUserCommands() const {
@@ -647,6 +648,7 @@ class CommandInterpreter : public Broadcaster,
}
llvm::json::Value GetStatistics();
+ StructuredData::ArraySP GetTranscript() const;
protected:
friend class Debugger;
@@ -766,6 +768,12 @@ class CommandInterpreter : public Broadcaster,
CommandUsageMap m_command_usages;
StreamString m_transcript_stream;
+
+ /// Contains a list of handled commands, output and error. Each element in
+ /// the list is a dictionary with three keys: "command" (string), "output"
+ /// (list of strings) and optionally "error" (list of strings). Each string
+ /// in "output" and "error" is a line (without EOL characteres).
+ StructuredData::ArraySP m_transcript_structured;
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 5e63ef92fac3ec..72fd035c23e47e 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -290,6 +290,9 @@ class StructuredData {
void GetDescription(lldb_private::Stream &s) const override;
+ static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit,
+ bool keepEmpty);
+
protected:
typedef std::vector<ObjectSP> collection;
collection m_items;
@@ -366,10 +369,10 @@ class StructuredData {
class String : public Object {
public:
String() : Object(lldb::eStructuredDataTypeString) {}
- explicit String(llvm::StringRef S)
- : Object(lldb::eStructuredDataTypeString), m_value(S) {}
+ explicit String(llvm::StringRef s)
+ : Object(lldb::eStructuredDataTypeString), m_value(s) {}
- void SetValue(llvm::StringRef S) { m_value = std::string(S); }
+ void SetValue(llvm::StringRef s) { m_value = std::string(s); }
llvm::StringRef GetValue() { return m_value; }
@@ -432,7 +435,7 @@ class StructuredData {
}
return success;
}
-
+
template <class IntType>
bool GetValueForKeyAsInteger(llvm::StringRef key, IntType &result) const {
ObjectSP value_sp = GetValueForKey(key);
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index 242b3f8f09c48a..e96b5a047c64d5 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -573,7 +573,13 @@ SBStructuredData SBCommandInterpreter::GetStatistics() {
SBStructuredData SBCommandInterpreter::GetTranscript() {
LLDB_INSTRUMENT_VA(this);
- return SBStructuredData();
+
+ SBStructuredData data;
+ if (!IsValid())
+ return data;
+
+ data.m_impl_up->SetObjectSP(m_opaque_ptr->GetTranscript());
+ return data;
}
lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 4c58ecc3c1848f..b5f726d3234655 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -51,6 +51,7 @@
#include "lldb/Utility/Log.h"
#include "lldb/Utility/State.h"
#include "lldb/Utility/Stream.h"
+#include "lldb/Utility/StructuredData.h"
#include "lldb/Utility/Timer.h"
#include "lldb/Host/Config.h"
@@ -135,7 +136,8 @@ CommandInterpreter::CommandInterpreter(Debugger &debugger,
m_skip_lldbinit_files(false), m_skip_app_init_files(false),
m_comment_char('#'), m_batch_command_mode(false),
m_truncation_warning(eNoOmission), m_max_depth_warning(eNoOmission),
- m_command_source_depth(0) {
+ m_command_source_depth(0),
+ m_transcript_structured(std::make_shared<StructuredData::Array>()) {
SetEventName(eBroadcastBitThreadShouldExit, "thread-should-exit");
SetEventName(eBroadcastBitResetPrompt, "reset-prompt");
SetEventName(eBroadcastBitQuitCommandReceived, "quit");
@@ -1891,6 +1893,10 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
m_transcript_stream << "(lldb) " << command_line << '\n';
+ auto transcript_item = std::make_shared<StructuredData::Dictionary>();
+ transcript_item->AddStringItem("command", command_line);
+ m_transcript_structured->AddItem(transcript_item);
+
bool empty_command = false;
bool comment_command = false;
if (command_string.empty())
@@ -2044,6 +2050,15 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
m_transcript_stream << result.GetOutputData();
m_transcript_stream << result.GetErrorData();
+ // Add output and error to the transcript item after splitting lines. In the
+ // future, other aspects of the command (e.g. perf) can be added, too.
+ transcript_item->AddItem(
+ "output", StructuredData::Array::SplitString(result.GetOutputData(), '\n',
+ -1, false));
+ transcript_item->AddItem(
+ "error", StructuredData::Array::SplitString(result.GetErrorData(), '\n',
+ -1, false));
+
return result.Succeeded();
}
@@ -3554,3 +3569,7 @@ llvm::json::Value CommandInterpreter::GetStatistics() {
stats.try_emplace(command_usage.getKey(), command_usage.getValue());
return stats;
}
+
+StructuredData::ArraySP CommandInterpreter::GetTranscript() const {
+ return m_transcript_structured;
+}
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 7686d052c599c6..278ec93168926a 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -10,10 +10,13 @@
#include "lldb/Utility/FileSpec.h"
#include "lldb/Utility/Status.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cerrno>
#include <cinttypes>
#include <cstdlib>
+#include <memory>
+#include <sstream>
using namespace lldb_private;
using namespace llvm;
@@ -289,3 +292,35 @@ void StructuredData::Null::GetDescription(lldb_private::Stream &s) const {
void StructuredData::Generic::GetDescription(lldb_private::Stream &s) const {
s.Printf("%p", m_object);
}
+
+/// This is the same implementation as `StringRef::split`. Not depending on
+/// `StringRef::split` because it will involve a temporary `SmallVectorImpl`.
+StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
+ char separator,
+ int maxSplit,
+ bool keepEmpty) {
+ auto array_sp = std::make_shared<StructuredData::Array>();
+
+ // Count down from MaxSplit. When MaxSplit is -1, this will just split
+ // "forever". This doesn't support splitting more than 2^31 times
+ // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+ // but that seems unlikely to be useful.
+ while (maxSplit-- != 0) {
+ size_t idx = s.find(separator);
+ if (idx == llvm::StringLiteral::npos)
+ break;
+
+ // Push this split.
+ if (keepEmpty || idx > 0)
+ array_sp->AddStringItem(s.slice(0, idx));
+
+ // Jump forward.
+ s = s.slice(idx + 1, llvm::StringLiteral::npos);
+ }
+
+ // Push the tail.
+ if (keepEmpty || !s.empty())
+ array_sp->AddStringItem(s);
+
+ return array_sp;
+}
>From efc1c2037da00dacddc3e52812f93377d41d4f82 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 14:45:48 -0700
Subject: [PATCH 03/11] Add unittest
---
.../interpreter/TestCommandInterpreterAPI.py | 42 +++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index 8f9fbfc255bb02..93d36e3388941c 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -1,5 +1,6 @@
"""Test the SBCommandInterpreter APIs."""
+import json
import lldb
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
@@ -85,3 +86,44 @@ def test_command_output(self):
self.assertEqual(res.GetOutput(), "")
self.assertIsNotNone(res.GetError())
self.assertEqual(res.GetError(), "")
+
+ def test_structured_transcript(self):
+ """Test structured transcript generation and retrieval."""
+ ci = self.dbg.GetCommandInterpreter()
+ self.assertTrue(ci, VALID_COMMAND_INTERPRETER)
+
+ # Send a few commands through the command interpreter
+ res = lldb.SBCommandReturnObject()
+ ci.HandleCommand("version", res)
+ ci.HandleCommand("an-unknown-command", res)
+
+ # Retrieve the transcript and convert it into a Python object
+ transcript = ci.GetTranscript()
+ self.assertTrue(transcript.IsValid())
+
+ stream = lldb.SBStream()
+ self.assertTrue(stream)
+
+ error = transcript.GetAsJSON(stream)
+ self.assertSuccess(error)
+
+ transcript = json.loads(stream.GetData())
+
+ # Validate the transcript.
+ #
+ # Notes:
+ # 1. The following asserts rely on the exact output format of the
+ # commands. Hopefully we are not changing them any time soon.
+ # 2. The transcript will contain a bunch of commands that are run
+ # automatically. We only want to validate for the ones that are
+ # handled in the above, hence the negative indices to find them.
+ self.assertEqual(transcript[-2]["command"], "version")
+ self.assertTrue("lldb version" in transcript[-2]["output"][0])
+ self.assertEqual(transcript[-1],
+ {
+ "command": "an-unknown-command",
+ "output": [],
+ "error": [
+ "error: 'an-unknown-command' is not a valid command.",
+ ],
+ })
>From 6d1190df0ecae0fa49519545526636e84ee9b394 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 15:20:38 -0700
Subject: [PATCH 04/11] Add more test asserts and some touch ups
---
.../lldb/Interpreter/CommandInterpreter.h | 2 +-
.../source/Interpreter/CommandInterpreter.cpp | 2 +
lldb/source/Utility/StructuredData.cpp | 2 -
.../interpreter/TestCommandInterpreterAPI.py | 63 ++++++++++++++++---
lldb/test/API/python_api/interpreter/main.c | 5 +-
5 files changed, 60 insertions(+), 14 deletions(-)
diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index 9474c41c0dcedd..c0846db8f2b8a2 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -772,7 +772,7 @@ class CommandInterpreter : public Broadcaster,
/// Contains a list of handled commands, output and error. Each element in
/// the list is a dictionary with three keys: "command" (string), "output"
/// (list of strings) and optionally "error" (list of strings). Each string
- /// in "output" and "error" is a line (without EOL characteres).
+ /// in "output" and "error" is a line (without EOL characters).
StructuredData::ArraySP m_transcript_structured;
};
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index b5f726d3234655..1ec1da437ba3ac 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -1893,6 +1893,8 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
m_transcript_stream << "(lldb) " << command_line << '\n';
+ // The same `transcript_item` will be used below to add output and error of
+ // the command.
auto transcript_item = std::make_shared<StructuredData::Dictionary>();
transcript_item->AddStringItem("command", command_line);
m_transcript_structured->AddItem(transcript_item);
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 278ec93168926a..7870334d708fe9 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -15,8 +15,6 @@
#include <cerrno>
#include <cinttypes>
#include <cstdlib>
-#include <memory>
-#include <sstream>
using namespace lldb_private;
using namespace llvm;
diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index 93d36e3388941c..e5cb4a18f7df6f 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -89,6 +89,13 @@ def test_command_output(self):
def test_structured_transcript(self):
"""Test structured transcript generation and retrieval."""
+ # Get command interpreter and create a target
+ self.build()
+ exe = self.getBuildArtifact("a.out")
+
+ target = self.dbg.CreateTarget(exe)
+ self.assertTrue(target, VALID_TARGET)
+
ci = self.dbg.GetCommandInterpreter()
self.assertTrue(ci, VALID_COMMAND_INTERPRETER)
@@ -96,6 +103,10 @@ def test_structured_transcript(self):
res = lldb.SBCommandReturnObject()
ci.HandleCommand("version", res)
ci.HandleCommand("an-unknown-command", res)
+ ci.HandleCommand("breakpoint set -f main.c -l %d" % self.line, res)
+ ci.HandleCommand("r", res)
+ ci.HandleCommand("p a", res)
+ total_number_of_commands = 5
# Retrieve the transcript and convert it into a Python object
transcript = ci.GetTranscript()
@@ -109,17 +120,25 @@ def test_structured_transcript(self):
transcript = json.loads(stream.GetData())
+ # The transcript will contain a bunch of commands that are run
+ # automatically. We only want to validate for the ones that are
+ # listed above, hence trimming to the last parts.
+ transcript = transcript[-total_number_of_commands:]
+
+ print(transcript)
+
# Validate the transcript.
#
- # Notes:
- # 1. The following asserts rely on the exact output format of the
- # commands. Hopefully we are not changing them any time soon.
- # 2. The transcript will contain a bunch of commands that are run
- # automatically. We only want to validate for the ones that are
- # handled in the above, hence the negative indices to find them.
- self.assertEqual(transcript[-2]["command"], "version")
- self.assertTrue("lldb version" in transcript[-2]["output"][0])
- self.assertEqual(transcript[-1],
+ # The following asserts rely on the exact output format of the
+ # commands. Hopefully we are not changing them any time soon.
+
+ # (lldb) version
+ self.assertEqual(transcript[0]["command"], "version")
+ self.assertTrue("lldb version" in transcript[0]["output"][0])
+ self.assertEqual(transcript[0]["error"], [])
+
+ # (lldb) an-unknown-command
+ self.assertEqual(transcript[1],
{
"command": "an-unknown-command",
"output": [],
@@ -127,3 +146,29 @@ def test_structured_transcript(self):
"error: 'an-unknown-command' is not a valid command.",
],
})
+
+ # (lldb) breakpoint set -f main.c -l X
+ self.assertEqual(transcript[2],
+ {
+ "command": "breakpoint set -f main.c -l %d" % self.line,
+ "output": [
+ "Breakpoint 1: where = a.out`main + 29 at main.c:5:5, address = 0x0000000100000f7d",
+ ],
+ "error": [],
+ })
+
+ # (lldb) r
+ self.assertEqual(transcript[3]["command"], "r")
+ self.assertTrue("Process" in transcript[3]["output"][0])
+ self.assertTrue("launched" in transcript[3]["output"][0])
+ self.assertEqual(transcript[3]["error"], [])
+
+ # (lldb) p a
+ self.assertEqual(transcript[4],
+ {
+ "command": "p a",
+ "output": [
+ "(int) 123",
+ ],
+ "error": [],
+ })
diff --git a/lldb/test/API/python_api/interpreter/main.c b/lldb/test/API/python_api/interpreter/main.c
index 277aa54a4eea52..366ffde5fdef51 100644
--- a/lldb/test/API/python_api/interpreter/main.c
+++ b/lldb/test/API/python_api/interpreter/main.c
@@ -1,6 +1,7 @@
#include <stdio.h>
int main(int argc, char const *argv[]) {
- printf("Hello world.\n");
- return 0;
+ int a = 123;
+ printf("Hello world.\n");
+ return 0;
}
>From 26a726b2f94713ef8508049115ab93ee91e9a836 Mon Sep 17 00:00:00 2001
From: royitaqi <royitaqi at users.noreply.github.com>
Date: Wed, 1 May 2024 15:27:33 -0700
Subject: [PATCH 05/11] Apply suggestions from code review
Co-authored-by: Med Ismail Bennani <ismail at bennani.ma>
---
lldb/source/API/SBCommandInterpreter.cpp | 6 ++----
lldb/source/Utility/StructuredData.cpp | 4 ++--
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index e96b5a047c64d5..233a2f97fb9f15 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -575,10 +575,8 @@ SBStructuredData SBCommandInterpreter::GetTranscript() {
LLDB_INSTRUMENT_VA(this);
SBStructuredData data;
- if (!IsValid())
- return data;
-
- data.m_impl_up->SetObjectSP(m_opaque_ptr->GetTranscript());
+ if (IsValid())
+ data.m_impl_up->SetObjectSP(m_opaque_ptr->GetTranscript());
return data;
}
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 7870334d708fe9..4ca804cb76a74b 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -299,9 +299,9 @@ StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
bool keepEmpty) {
auto array_sp = std::make_shared<StructuredData::Array>();
- // Count down from MaxSplit. When MaxSplit is -1, this will just split
+ // Count down from `maxSplit`. When `maxSplit` is -1, this will just split
// "forever". This doesn't support splitting more than 2^31 times
- // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+ // intentionally; if we ever want that we can make `maxSplit` a 64-bit integer
// but that seems unlikely to be useful.
while (maxSplit-- != 0) {
size_t idx = s.find(separator);
>From 52a310b8c236d252233b6e49de48a0c53eab9f45 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 16:07:44 -0700
Subject: [PATCH 06/11] Move and add document for Array::SplitString
---
lldb/include/lldb/Utility/StructuredData.h | 25 ++++++++++++++++++++--
lldb/source/Utility/StructuredData.cpp | 2 --
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 72fd035c23e47e..69db0caca20510 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -290,8 +290,29 @@ class StructuredData {
void GetDescription(lldb_private::Stream &s) const override;
- static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit,
- bool keepEmpty);
+ /// Creates an Array of substrings by splitting a string around the occurrences of a separator character.
+ ///
+ /// Note:
+ /// * This is almost the same API and implementation as `StringRef::split`.
+ /// * Not depending on `StringRef::split` because it will involve a
+ /// temporary `SmallVectorImpl`.
+ ///
+ /// \param[in] s
+ /// The input string.
+ ///
+ /// \param[in] separator
+ /// The character to split on.
+ ///
+ /// \param[in] maxSplit
+ /// The maximum number of times the string is split. If \a maxSplit is >= 0, at most \a maxSplit splits are done and consequently <= \a maxSplit + 1 elements are returned.
+ ///
+ /// \param[in] keepEmpty
+ /// True if empty substrings should be returned. Empty substrings still count when considering \a maxSplit.
+ ///
+ /// \return
+ /// An array containing the substrings. If \a maxSplit == -1 and \a keepEmpty == true, then the concatination of the array forms the input string.
+ static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit = -1,
+ bool keepEmpty = true);
protected:
typedef std::vector<ObjectSP> collection;
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 4ca804cb76a74b..7fa1063e5f01fa 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -291,8 +291,6 @@ void StructuredData::Generic::GetDescription(lldb_private::Stream &s) const {
s.Printf("%p", m_object);
}
-/// This is the same implementation as `StringRef::split`. Not depending on
-/// `StringRef::split` because it will involve a temporary `SmallVectorImpl`.
StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
char separator,
int maxSplit,
>From 9beff0b2fdbac700f2aec6047ea90356ffecbce7 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 16:53:25 -0700
Subject: [PATCH 07/11] Add unit test for Array::SplitString
---
lldb/unittests/Utility/StructuredDataTest.cpp | 63 +++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/lldb/unittests/Utility/StructuredDataTest.cpp b/lldb/unittests/Utility/StructuredDataTest.cpp
index e536039f365a4b..f1074909463343 100644
--- a/lldb/unittests/Utility/StructuredDataTest.cpp
+++ b/lldb/unittests/Utility/StructuredDataTest.cpp
@@ -12,6 +12,7 @@
#include "lldb/Utility/Status.h"
#include "lldb/Utility/StreamString.h"
#include "lldb/Utility/StructuredData.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Path.h"
using namespace lldb;
@@ -112,3 +113,65 @@ TEST(StructuredDataTest, ParseJSONFromFile) {
object_sp->Dump(S, false);
EXPECT_EQ("[1,2,3]", S.GetString());
}
+
+struct ArraySplitStringTestCase {
+ llvm::StringRef s;
+ char separator;
+ int maxSplit;
+ bool keepEmpty;
+ std::vector<std::string> expected;
+};
+
+TEST(StructuredDataTest, ArraySplitString) {
+ ArraySplitStringTestCase test_cases[] = {
+ // Happy path
+ {
+ "1,2,,3",
+ ',',
+ -1,
+ true,
+ {"1", "2", "", "3"},
+ },
+ // No splits
+ {
+ "1,2,,3",
+ ',',
+ 0,
+ true,
+ {"1,2,,3"},
+ },
+ // 1 split
+ {
+ "1,2,,3",
+ ',',
+ 1,
+ true,
+ {"1", "2,,3"},
+ },
+ // No empty substrings
+ {
+ "1,2,,3",
+ ',',
+ -1,
+ false,
+ {"1", "2", "3"},
+ },
+ // Empty substrings count towards splits
+ {
+ ",1,2,3",
+ ',',
+ 1,
+ false,
+ {"1,2,3"},
+ },
+ };
+ for (const auto &test_case : test_cases) {
+ auto array = StructuredData::Array::SplitString(
+ test_case.s, test_case.separator, test_case.maxSplit,
+ test_case.keepEmpty);
+ EXPECT_EQ(test_case.expected.size(), array->GetSize());
+ for (unsigned int i = 0; i < test_case.expected.size(); ++i) {
+ EXPECT_EQ(test_case.expected[i], array->GetItemAtIndexAsString(i)->str());
+ }
+ }
+}
>From 6c6c5c272511f8c62c7ef14eefe67904875b4d2c Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 17:07:12 -0700
Subject: [PATCH 08/11] Fix format
---
lldb/include/lldb/Utility/StructuredData.h | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 69db0caca20510..8217d2bf33b808 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -290,7 +290,8 @@ class StructuredData {
void GetDescription(lldb_private::Stream &s) const override;
- /// Creates an Array of substrings by splitting a string around the occurrences of a separator character.
+ /// Creates an Array of substrings by splitting a string around the
+ /// occurrences of a separator character.
///
/// Note:
/// * This is almost the same API and implementation as `StringRef::split`.
@@ -304,15 +305,20 @@ class StructuredData {
/// The character to split on.
///
/// \param[in] maxSplit
- /// The maximum number of times the string is split. If \a maxSplit is >= 0, at most \a maxSplit splits are done and consequently <= \a maxSplit + 1 elements are returned.
+ /// The maximum number of times the string is split. If \a maxSplit is >=
+ /// 0, at most \a maxSplit splits are done and consequently <= \a maxSplit
+ /// + 1 elements are returned.
///
/// \param[in] keepEmpty
- /// True if empty substrings should be returned. Empty substrings still count when considering \a maxSplit.
+ /// True if empty substrings should be returned. Empty substrings still
+ /// count when considering \a maxSplit.
///
/// \return
- /// An array containing the substrings. If \a maxSplit == -1 and \a keepEmpty == true, then the concatination of the array forms the input string.
- static ArraySP SplitString(llvm::StringRef s, char separator, int maxSplit = -1,
- bool keepEmpty = true);
+ /// An array containing the substrings. If \a maxSplit == -1 and \a
+ /// keepEmpty == true, then the concatination of the array forms the input
+ /// string.
+ static ArraySP SplitString(llvm::StringRef s, char separator,
+ int maxSplit = -1, bool keepEmpty = true);
protected:
typedef std::vector<ObjectSP> collection;
>From fbf8e9ab4cfe232218edb1eb4fe0c22d9a68f4a9 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 1 May 2024 17:23:14 -0700
Subject: [PATCH 09/11] Improve Python API test reliability
---
.../interpreter/TestCommandInterpreterAPI.py | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index e5cb4a18f7df6f..5bb9f579ad13f5 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -125,8 +125,6 @@ def test_structured_transcript(self):
# listed above, hence trimming to the last parts.
transcript = transcript[-total_number_of_commands:]
- print(transcript)
-
# Validate the transcript.
#
# The following asserts rely on the exact output format of the
@@ -147,18 +145,15 @@ def test_structured_transcript(self):
],
})
- # (lldb) breakpoint set -f main.c -l X
- self.assertEqual(transcript[2],
- {
- "command": "breakpoint set -f main.c -l %d" % self.line,
- "output": [
- "Breakpoint 1: where = a.out`main + 29 at main.c:5:5, address = 0x0000000100000f7d",
- ],
- "error": [],
- })
+ # (lldb) breakpoint set -f main.c -l <line>
+ self.assertEqual(transcript[2]["command"], "breakpoint set -f main.c -l %d" % self.line)
+ # Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address = 0x0000000100000f7d
+ self.assertTrue("Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address =" in transcript[2]["output"][0])
+ self.assertEqual(transcript[2]["error"], [])
# (lldb) r
self.assertEqual(transcript[3]["command"], "r")
+ # Process 25494 launched: '<path>/TestCommandInterpreterAPI.test_structured_transcript/a.out' (x86_64)
self.assertTrue("Process" in transcript[3]["output"][0])
self.assertTrue("launched" in transcript[3]["output"][0])
self.assertEqual(transcript[3]["error"], [])
>From 836a719f0f2124627ebb0ea625486bcc8cdf3cd5 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 2 May 2024 21:08:46 -0700
Subject: [PATCH 10/11] Update implementation of Array::SplitString, and other
smaller changes
---
lldb/include/lldb/API/SBCommandInterpreter.h | 2 +-
.../lldb/Interpreter/CommandInterpreter.h | 2 +-
lldb/include/lldb/Utility/StructuredData.h | 11 ++------
.../source/Interpreter/CommandInterpreter.cpp | 6 ++--
lldb/source/Utility/StructuredData.cpp | 28 +++++--------------
5 files changed, 15 insertions(+), 34 deletions(-)
diff --git a/lldb/include/lldb/API/SBCommandInterpreter.h b/lldb/include/lldb/API/SBCommandInterpreter.h
index d65f06d676f91f..f56f7d844b0d13 100644
--- a/lldb/include/lldb/API/SBCommandInterpreter.h
+++ b/lldb/include/lldb/API/SBCommandInterpreter.h
@@ -321,7 +321,7 @@ class SBCommandInterpreter {
/// Returns a list of handled commands, output and error. Each element in
/// the list is a dictionary with three keys: "command" (string), "output"
/// (list of strings) and optionally "error" (list of strings). Each string
- /// in "output" and "error" is a line (without EOL characteres).
+ /// in "output" and "error" is a line (without EOL characters).
SBStructuredData GetTranscript();
protected:
diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index c0846db8f2b8a2..0938ad6ae78ab4 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -773,7 +773,7 @@ class CommandInterpreter : public Broadcaster,
/// the list is a dictionary with three keys: "command" (string), "output"
/// (list of strings) and optionally "error" (list of strings). Each string
/// in "output" and "error" is a line (without EOL characters).
- StructuredData::ArraySP m_transcript_structured;
+ StructuredData::ArraySP m_transcript;
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 8217d2bf33b808..563eb1b1a284af 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -293,11 +293,6 @@ class StructuredData {
/// Creates an Array of substrings by splitting a string around the
/// occurrences of a separator character.
///
- /// Note:
- /// * This is almost the same API and implementation as `StringRef::split`.
- /// * Not depending on `StringRef::split` because it will involve a
- /// temporary `SmallVectorImpl`.
- ///
/// \param[in] s
/// The input string.
///
@@ -396,10 +391,10 @@ class StructuredData {
class String : public Object {
public:
String() : Object(lldb::eStructuredDataTypeString) {}
- explicit String(llvm::StringRef s)
- : Object(lldb::eStructuredDataTypeString), m_value(s) {}
+ explicit String(llvm::StringRef S)
+ : Object(lldb::eStructuredDataTypeString), m_value(S) {}
- void SetValue(llvm::StringRef s) { m_value = std::string(s); }
+ void SetValue(llvm::StringRef S) { m_value = std::string(S); }
llvm::StringRef GetValue() { return m_value; }
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 1ec1da437ba3ac..d5e37dee186a62 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -137,7 +137,7 @@ CommandInterpreter::CommandInterpreter(Debugger &debugger,
m_comment_char('#'), m_batch_command_mode(false),
m_truncation_warning(eNoOmission), m_max_depth_warning(eNoOmission),
m_command_source_depth(0),
- m_transcript_structured(std::make_shared<StructuredData::Array>()) {
+ m_transcript(std::make_shared<StructuredData::Array>()) {
SetEventName(eBroadcastBitThreadShouldExit, "thread-should-exit");
SetEventName(eBroadcastBitResetPrompt, "reset-prompt");
SetEventName(eBroadcastBitQuitCommandReceived, "quit");
@@ -1897,7 +1897,7 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
// the command.
auto transcript_item = std::make_shared<StructuredData::Dictionary>();
transcript_item->AddStringItem("command", command_line);
- m_transcript_structured->AddItem(transcript_item);
+ m_transcript->AddItem(transcript_item);
bool empty_command = false;
bool comment_command = false;
@@ -3573,5 +3573,5 @@ llvm::json::Value CommandInterpreter::GetStatistics() {
}
StructuredData::ArraySP CommandInterpreter::GetTranscript() const {
- return m_transcript_structured;
+ return m_transcript;
}
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index 7fa1063e5f01fa..e5f9a69fab2ab6 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -295,28 +295,14 @@ StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
char separator,
int maxSplit,
bool keepEmpty) {
- auto array_sp = std::make_shared<StructuredData::Array>();
+ // Split the string into a small vector.
+ llvm::SmallVector<StringRef> small_vec;
+ s.split(small_vec, separator, maxSplit, keepEmpty);
- // Count down from `maxSplit`. When `maxSplit` is -1, this will just split
- // "forever". This doesn't support splitting more than 2^31 times
- // intentionally; if we ever want that we can make `maxSplit` a 64-bit integer
- // but that seems unlikely to be useful.
- while (maxSplit-- != 0) {
- size_t idx = s.find(separator);
- if (idx == llvm::StringLiteral::npos)
- break;
-
- // Push this split.
- if (keepEmpty || idx > 0)
- array_sp->AddStringItem(s.slice(0, idx));
-
- // Jump forward.
- s = s.slice(idx + 1, llvm::StringLiteral::npos);
+ // Copy the substrings from the small vector into the output array.
+ auto array_sp = std::make_shared<StructuredData::Array>();
+ for (auto substring : small_vec) {
+ array_sp->AddStringItem(std::move(substring));
}
-
- // Push the tail.
- if (keepEmpty || !s.empty())
- array_sp->AddStringItem(s);
-
return array_sp;
}
>From 6f82462d7dff7c5d97c58a84b4eac9aca8d7fc99 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Mon, 6 May 2024 10:47:47 -0700
Subject: [PATCH 11/11] Update comment, more reliable test, minor fix
---
lldb/source/Utility/StructuredData.cpp | 2 +-
.../interpreter/TestCommandInterpreterAPI.py | 15 +++++++++++----
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index e5f9a69fab2ab6..f4e87b7167a1f8 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -302,7 +302,7 @@ StructuredData::ArraySP StructuredData::Array::SplitString(llvm::StringRef s,
// Copy the substrings from the small vector into the output array.
auto array_sp = std::make_shared<StructuredData::Array>();
for (auto substring : small_vec) {
- array_sp->AddStringItem(std::move(substring));
+ array_sp->AddStringItem(substring);
}
return array_sp;
}
diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
index 5bb9f579ad13f5..54179892eabda9 100644
--- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
+++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
@@ -120,9 +120,16 @@ def test_structured_transcript(self):
transcript = json.loads(stream.GetData())
- # The transcript will contain a bunch of commands that are run
- # automatically. We only want to validate for the ones that are
- # listed above, hence trimming to the last parts.
+ print('TRANSCRIPT')
+ print(transcript)
+
+ # The transcript will contain a bunch of commands that are from
+ # a general setup code. See `def setUpCommands(cls)` in
+ # `lldb/packages/Python/lldbsuite/test/lldbtest.py`.
+ # https://shorturl.at/bJKVW
+ #
+ # We only want to validate for the ones that are listed above, hence
+ # trimming to the last parts.
transcript = transcript[-total_number_of_commands:]
# Validate the transcript.
@@ -148,7 +155,7 @@ def test_structured_transcript(self):
# (lldb) breakpoint set -f main.c -l <line>
self.assertEqual(transcript[2]["command"], "breakpoint set -f main.c -l %d" % self.line)
# Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address = 0x0000000100000f7d
- self.assertTrue("Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address =" in transcript[2]["output"][0])
+ self.assertTrue("Breakpoint 1: where = a.out`main " in transcript[2]["output"][0])
self.assertEqual(transcript[2]["error"], [])
# (lldb) r
More information about the lldb-commits
mailing list