[Lldb-commits] [lldb] [lldb-dap] Validate utf8 protocol messages. (PR #181261)
via lldb-commits
lldb-commits at lists.llvm.org
Thu Feb 12 14:55:27 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: John Harrison (ashgti)
<details>
<summary>Changes</summary>
I ran into this while debugging a different issue, but when we calcualte the 'value' field of a variable we were not ensuring the contents were valid utf8. If assertions are enabled then llvm::json::Value will assert that the string contains invalid utf8.
To address this I added a wrapper type (`lldb_dap::protocol::SanitizedString`) that can be used as a thin wrapper around `std::string` to ensure a field contains valid utf8.
I've used it in a handful of places that I believe could contain invalid utf8 output, but we can add it to other places as well in the protocol types.
---
Full diff: https://github.com/llvm/llvm-project/pull/181261.diff
7 Files Affected:
- (modified) lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py (+33-13)
- (modified) lldb/test/API/tools/lldb-dap/variables/main.cpp (+4)
- (modified) lldb/tools/lldb-dap/DAP.cpp (+4-3)
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp (+17)
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolBase.h (+19)
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+1-1)
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolTypes.h (+2-1)
``````````diff
diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
index 1dbb0143e7a55..f737d41eaecec 100644
--- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
+++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
@@ -188,6 +188,8 @@ def do_test_scopes_variables_setVariable_evaluate(
},
"readOnly": True,
},
+ "valid_str": {},
+ "malformed_str": {},
"x": {"equals": {"type": "int"}},
}
@@ -340,9 +342,9 @@ def do_test_scopes_variables_setVariable_evaluate(
verify_locals["argc"]["equals"]["value"] = "123"
verify_locals["pt"]["children"]["x"]["equals"]["value"] = "111"
- verify_locals["x @ main.cpp:19"] = {"equals": {"type": "int", "value": "89"}}
- verify_locals["x @ main.cpp:21"] = {"equals": {"type": "int", "value": "42"}}
- verify_locals["x @ main.cpp:23"] = {"equals": {"type": "int", "value": "72"}}
+ verify_locals["x @ main.cpp:23"] = {"equals": {"type": "int", "value": "89"}}
+ verify_locals["x @ main.cpp:25"] = {"equals": {"type": "int", "value": "42"}}
+ verify_locals["x @ main.cpp:27"] = {"equals": {"type": "int", "value": "72"}}
self.verify_variables(verify_locals, self.dap_server.get_local_variables())
@@ -350,22 +352,22 @@ def do_test_scopes_variables_setVariable_evaluate(
self.assertFalse(self.set_local("x2", 9)["success"])
self.assertFalse(self.set_local("x @ main.cpp:0", 9)["success"])
- self.assertTrue(self.set_local("x @ main.cpp:19", 19)["success"])
- self.assertTrue(self.set_local("x @ main.cpp:21", 21)["success"])
- self.assertTrue(self.set_local("x @ main.cpp:23", 23)["success"])
+ self.assertTrue(self.set_local("x @ main.cpp:23", 19)["success"])
+ self.assertTrue(self.set_local("x @ main.cpp:25", 21)["success"])
+ self.assertTrue(self.set_local("x @ main.cpp:27", 23)["success"])
# The following should have no effect
- self.assertFalse(self.set_local("x @ main.cpp:23", "invalid")["success"])
+ self.assertFalse(self.set_local("x @ main.cpp:27", "invalid")["success"])
- verify_locals["x @ main.cpp:19"]["equals"]["value"] = "19"
- verify_locals["x @ main.cpp:21"]["equals"]["value"] = "21"
- verify_locals["x @ main.cpp:23"]["equals"]["value"] = "23"
+ verify_locals["x @ main.cpp:23"]["equals"]["value"] = "19"
+ verify_locals["x @ main.cpp:25"]["equals"]["value"] = "21"
+ verify_locals["x @ main.cpp:27"]["equals"]["value"] = "23"
self.verify_variables(verify_locals, self.dap_server.get_local_variables())
# The plain x variable shold refer to the innermost x
self.assertTrue(self.set_local("x", 22)["success"])
- verify_locals["x @ main.cpp:23"]["equals"]["value"] = "22"
+ verify_locals["x @ main.cpp:27"]["equals"]["value"] = "22"
self.verify_variables(verify_locals, self.dap_server.get_local_variables())
@@ -382,9 +384,9 @@ def do_test_scopes_variables_setVariable_evaluate(
names = [var["name"] for var in locals]
# The first shadowed x shouldn't have a suffix anymore
verify_locals["x"] = {"equals": {"type": "int", "value": "19"}}
- self.assertNotIn("x @ main.cpp:19", names)
- self.assertNotIn("x @ main.cpp:21", names)
self.assertNotIn("x @ main.cpp:23", names)
+ self.assertNotIn("x @ main.cpp:25", names)
+ self.assertNotIn("x @ main.cpp:27", names)
self.verify_variables(verify_locals, locals)
@@ -455,6 +457,22 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo
},
"readOnly": True,
},
+ "valid_str": {
+ "equals": {
+ "type": "const char *",
+ },
+ "matches": {
+ "value": re.compile(r'0x\w+ "πΆπ°LπΎπ CππΌπ΄π"'),
+ },
+ },
+ "malformed_str": {
+ "equals": {
+ "type": "const char *",
+ },
+ "matches": {
+ "value": re.compile(r'0x\w+ "lone trailing \\x81\\x82 bytes"'),
+ },
+ },
"x": {
"equals": {"type": "int"},
"missing": ["indexedVariables"],
@@ -678,6 +696,8 @@ def test_return_variables(self):
"argc": {},
"argv": {},
"pt": {"readOnly": True},
+ "valid_str": {},
+ "malformed_str": {},
"x": {},
"return_result": {"equals": {"type": "int"}},
}
diff --git a/lldb/test/API/tools/lldb-dap/variables/main.cpp b/lldb/test/API/tools/lldb-dap/variables/main.cpp
index 0e363001f2f42..04fc62f02c22f 100644
--- a/lldb/test/API/tools/lldb-dap/variables/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/variables/main.cpp
@@ -5,6 +5,7 @@ struct PointType {
int y;
int buffer[BUFFER_SIZE];
};
+#include <cstdio>
#include <vector>
int g_global = 123;
static int s_global = 234;
@@ -16,6 +17,9 @@ int main(int argc, char const *argv[]) {
PointType pt = {11, 22, {0}};
for (int i = 0; i < BUFFER_SIZE; ++i)
pt.buffer[i] = i;
+ const char *valid_str = "πΆπ°LπΎπ CππΌπ΄π";
+ const char *malformed_str = "lone trailing \x81\x82 bytes";
+ printf("print malformed utf8 %s %s\n", valid_str, malformed_str);
int x = s_global - g_global - pt.y; // breakpoint 1
{
int x = 42;
diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp
index b76b05c5d1459..a629453dd5347 100644
--- a/lldb/tools/lldb-dap/DAP.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -411,9 +411,10 @@ void DAP::SendOutput(OutputType o, const llvm::StringRef output) {
if (end == llvm::StringRef::npos)
end = output.size() - 1;
llvm::json::Object event(CreateEventObject("output"));
- llvm::json::Object body;
- body.try_emplace("category", category);
- EmplaceSafeString(body, "output", output.slice(idx, end + 1).str());
+ llvm::json::Object body{
+ {"category", category},
+ {"output", protocol::SanitizedString(output.slice(idx, end + 1).str())},
+ };
event.try_emplace("body", std::move(body));
SendJSON(llvm::json::Value(std::move(event)));
idx = end + 1;
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp
index 72359214c8537..a9a929f9b88d2 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp
@@ -57,6 +57,23 @@ bool fromJSON(const json::Value &Params, MessageType &M, json::Path P) {
return true;
}
+json::Value toJSON(const SanitizedString &S) {
+ if (LLVM_LIKELY(llvm::json::isUTF8(std::string(S))))
+ return std::string(S);
+ llvm::errs() << "Here!\n";
+ return llvm::json::fixUTF8(std::string(S));
+}
+
+bool fromJSON(const llvm::json::Value &Param, SanitizedString &Str,
+ llvm::json::Path Path) {
+ if (auto s = Param.getAsString()) {
+ Str = *s;
+ return true;
+ }
+ Path.report("expected string");
+ return false;
+}
+
json::Value toJSON(const Request &R) {
assert(R.seq != kCalculateSeq && "invalid seq");
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
index 09ce6802b17c0..3053312c4660f 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
@@ -20,6 +20,7 @@
#ifndef LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_BASE_H
#define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_BASE_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/JSON.h"
#include <cstdint>
#include <optional>
@@ -37,6 +38,24 @@ using Id = uint64_t;
/// the current session.
static constexpr Id kCalculateSeq = UINT64_MAX;
+/// A wrapper around a string to ensure the contents are sanitized as utf8
+/// during serialization. This value should be used for any strings that may
+/// contain raw data like variable values.
+class SanitizedString {
+public:
+ SanitizedString(std::string str) : m_str(str) {}
+ SanitizedString(llvm::StringRef str) : m_str(str.str()) {}
+ SanitizedString(const char *str) : m_str(str) {}
+ SanitizedString() = default;
+
+ operator std::string() const { return m_str; }
+
+private:
+ std::string m_str;
+};
+llvm::json::Value toJSON(const SanitizedString &s);
+bool fromJSON(const llvm::json::Value &, SanitizedString &, llvm::json::Path);
+
/// A client or debug adapter initiated request.
struct Request {
/// The command to execute.
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 28c9f48200e0c..af50e77ce6ab2 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -452,7 +452,7 @@ bool fromJSON(const llvm::json::Value &, SetVariableArguments &,
/// Response to `setVariable` request.
struct SetVariableResponseBody {
/// The new value of the variable.
- std::string value;
+ SanitizedString value;
/// The type of the new value. Typically shown in the UI when hovering over
/// the value.
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
index 71046d24c9787..e627b02e37bd1 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
@@ -21,6 +21,7 @@
#define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_TYPES_H
#include "Protocol/DAPTypes.h"
+#include "Protocol/ProtocolBase.h"
#include "lldb/lldb-defines.h"
#include "lldb/lldb-types.h"
#include "llvm/ADT/DenseSet.h"
@@ -942,7 +943,7 @@ struct Variable {
/// its children are not yet visible.
///
/// An empty string can be used if no value should be shown in the UI.
- std::string value;
+ SanitizedString value;
/// The type of the variable's value. Typically shown in the UI when hovering
/// over the value.
``````````
</details>
https://github.com/llvm/llvm-project/pull/181261
More information about the lldb-commits
mailing list