[llvm-branch-commits] [lldb] release/22.x: [lldb-dap] Fix Completions Request crash (#176211) (PR #176796)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 19 10:19:25 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport 3ca7a729901851f4a6f83e9783ee393cca46fd12
Requested by: @<!-- -->da-viper
---
Patch is 21.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176796.diff
9 Files Affected:
- (modified) lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py (+6-1)
- (modified) lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py (+37-25)
- (modified) lldb/test/API/tools/lldb-dap/completions/main.cpp (+1)
- (modified) lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp (+99-46)
- (modified) lldb/tools/lldb-dap/LLDBUtils.cpp (+35)
- (modified) lldb/tools/lldb-dap/LLDBUtils.h (+18)
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+3-3)
- (modified) lldb/unittests/DAP/LLDBUtilsTest.cpp (+28)
- (modified) lldb/unittests/DAP/ProtocolTypesTest.cpp (+3-3)
``````````diff
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 6a98bb50c9036..a79d766118b9d 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -1393,7 +1393,12 @@ def request_compileUnits(self, moduleId):
return response
def request_completions(self, text, frameId=None):
- args_dict = {"text": text, "column": len(text) + 1}
+ def code_units(input: str) -> int:
+ utf16_bytes = input.encode("utf-16-le")
+ # one UTF16 codeunit = 2 bytes.
+ return len(utf16_bytes) // 2
+
+ args_dict = {"text": text, "column": code_units(text) + 1}
if frameId:
args_dict["frameId"] = frameId
command_dict = {
diff --git a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
index 0ebecf6872a7d..1792ff9953efe 100644
--- a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
+++ b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
@@ -5,31 +5,32 @@
import lldbdap_testcase
import dap_server
from lldbsuite.test import lldbutil
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import skipIf
+from lldbsuite.test.lldbtest import line_number
session_completion = {
"text": "session",
- "label": "session -- Commands controlling LLDB session.",
+ "label": "session",
+ "detail": "Commands controlling LLDB session.",
}
settings_completion = {
"text": "settings",
- "label": "settings -- Commands for managing LLDB settings.",
+ "label": "settings",
+ "detail": "Commands for managing LLDB settings.",
}
memory_completion = {
"text": "memory",
- "label": "memory -- Commands for operating on memory in the current target process.",
+ "label": "memory",
+ "detail": "Commands for operating on memory in the current target process.",
}
command_var_completion = {
"text": "var",
- "label": "var -- Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.",
+ "label": "var",
+ "detail": "Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.",
}
-variable_var_completion = {
- "text": "var",
- "label": "var -- vector<baz> &",
-}
-variable_var1_completion = {"text": "var1", "label": "var1 -- int &"}
-variable_var2_completion = {"text": "var2", "label": "var2 -- int &"}
+variable_var_completion = {"text": "var", "label": "var", "detail": "vector<baz> &"}
+variable_var1_completion = {"text": "var1", "label": "var1", "detail": "int &"}
+variable_var2_completion = {"text": "var2", "label": "var2", "detail": "int &"}
# Older version of libcxx produce slightly different typename strings for
@@ -79,11 +80,13 @@ def test_command_completions(self):
[
{
"text": "read",
- "label": "read -- Read from the memory of the current target process.",
+ "label": "read",
+ "detail": "Read from the memory of the current target process.",
},
{
"text": "region",
- "label": "region -- Get information on the memory region containing an address in the current target process.",
+ "label": "region",
+ "detail": "Get information on the memory region containing an address in the current target process.",
},
],
)
@@ -110,7 +113,8 @@ def test_command_completions(self):
[
{
"text": "set",
- "label": "set -- Set the value of the specified debugger setting.",
+ "label": "set",
+ "detail": "Set the value of the specified debugger setting.",
}
],
)
@@ -167,7 +171,7 @@ def test_variable_completions(self):
self.verify_completions(
self.dap_server.get_completions("str"),
[{"text": "struct", "label": "struct"}],
- [{"text": "str1", "label": "str1 -- std::string &"}],
+ [{"text": "str1", "label": "str1", "detail": "std::string &"}],
)
self.continue_to_next_stop()
@@ -189,42 +193,46 @@ def test_variable_completions(self):
self.dap_server.get_completions("str"),
[
{"text": "struct", "label": "struct"},
- {"text": "str1", "label": "str1 -- std::string &"},
+ {"text": "str1", "label": "str1", "detail": "std::string &"},
],
)
+ self.assertIsNotNone(self.dap_server.get_completions("ƒ"))
+ # Test utf8 after ascii.
+ self.dap_server.get_completions("mƒ")
+
# Completion also works for more complex expressions
self.verify_completions(
self.dap_server.get_completions("foo1.v"),
- [{"text": "var1", "label": "foo1.var1 -- int"}],
+ [{"text": "var1", "label": "foo1.var1", "detail": "int"}],
)
self.verify_completions(
self.dap_server.get_completions("foo1.my_bar_object.v"),
- [{"text": "var1", "label": "foo1.my_bar_object.var1 -- int"}],
+ [{"text": "var1", "label": "foo1.my_bar_object.var1", "detail": "int"}],
)
self.verify_completions(
self.dap_server.get_completions("foo1.var1 + foo1.v"),
- [{"text": "var1", "label": "foo1.var1 -- int"}],
+ [{"text": "var1", "label": "foo1.var1", "detail": "int"}],
)
self.verify_completions(
self.dap_server.get_completions("foo1.var1 + v"),
- [{"text": "var1", "label": "var1 -- int &"}],
+ [{"text": "var1", "label": "var1", "detail": "int &"}],
)
# should correctly handle spaces between objects and member operators
self.verify_completions(
self.dap_server.get_completions("foo1 .v"),
- [{"text": "var1", "label": ".var1 -- int"}],
- [{"text": "var2", "label": ".var2 -- int"}],
+ [{"text": "var1", "label": ".var1", "detail": "int"}],
+ [{"text": "var2", "label": ".var2", "detail": "int"}],
)
self.verify_completions(
self.dap_server.get_completions("foo1 . v"),
- [{"text": "var1", "label": "var1 -- int"}],
- [{"text": "var2", "label": "var2 -- int"}],
+ [{"text": "var1", "label": "var1", "detail": "int"}],
+ [{"text": "var2", "label": "var2", "detail": "int"}],
)
# Even in variable mode, we can still use the escape prefix
@@ -273,6 +281,10 @@ def test_auto_completions(self):
],
)
+ # TODO: Note we are not checking the result because the `expression --` command adds an extra character
+ # for non ascii variables.
+ self.assertIsNotNone(self.dap_server.get_completions("ƒ"))
+
self.continue_to_exit()
console_str = self.get_console()
# we check in console to avoid waiting for output event.
diff --git a/lldb/test/API/tools/lldb-dap/completions/main.cpp b/lldb/test/API/tools/lldb-dap/completions/main.cpp
index 4314067cfe951..a12ce18201d97 100644
--- a/lldb/test/API/tools/lldb-dap/completions/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/completions/main.cpp
@@ -29,6 +29,7 @@ int main(int argc, char const *argv[]) {
fun(vec);
bar bar1 = {2};
bar *bar2 = &bar1;
+ int ƒake_f = 200;
foo foo1 = {3, &bar1, bar1, NULL};
return 0; // breakpoint 2
}
diff --git a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
index de9a15dcb73f4..9a3973190f7e7 100644
--- a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
@@ -7,17 +7,70 @@
//===----------------------------------------------------------------------===//
#include "DAP.h"
-#include "JSONUtils.h"
+#include "LLDBUtils.h"
#include "Protocol/ProtocolRequests.h"
#include "Protocol/ProtocolTypes.h"
#include "RequestHandler.h"
#include "lldb/API/SBStringList.h"
+#include "llvm/Support/ConvertUTF.h"
using namespace llvm;
using namespace lldb_dap;
+using namespace lldb;
using namespace lldb_dap::protocol;
namespace lldb_dap {
+/// Gets the position in the UTF8 string where the specified line started.
+static size_t GetLineStartPos(StringRef text, uint32_t line) {
+ if (line == 0) // Invalid line.
+ return StringRef::npos;
+
+ if (line == 1)
+ return 0;
+
+ uint32_t cur_line = 1;
+ size_t pos = 0;
+
+ while (cur_line < line) {
+ const size_t new_line_pos = text.find('\n', pos);
+
+ if (new_line_pos == StringRef::npos)
+ return new_line_pos;
+
+ pos = new_line_pos + 1;
+ // text may end with a new line
+ if (pos >= text.size())
+ return StringRef::npos;
+
+ cur_line++;
+ }
+
+ assert(pos < text.size());
+ return pos;
+}
+
+static std::optional<size_t> GetCursorPos(StringRef text, uint32_t line,
+ uint32_t utf16_codeunits) {
+ if (text.empty())
+ return std::nullopt;
+
+ const size_t line_start_pos = GetLineStartPos(text, line);
+ if (line_start_pos == StringRef::npos)
+ return std::nullopt;
+
+ const StringRef completion_line =
+ text.substr(line_start_pos, text.find('\n', line_start_pos));
+ if (completion_line.empty())
+ return std::nullopt;
+
+ const std::optional<size_t> cursor_pos_opt =
+ UTF16CodeunitToBytes(completion_line, utf16_codeunits);
+ if (!cursor_pos_opt)
+ return std::nullopt;
+
+ const size_t cursor_pos = line_start_pos + *cursor_pos_opt;
+ return cursor_pos;
+}
/// Returns a list of possible completions for a given caret position and text.
///
@@ -25,85 +78,85 @@ namespace lldb_dap {
/// `supportsCompletionsRequest` is true.
Expected<CompletionsResponseBody>
CompletionsRequestHandler::Run(const CompletionsArguments &args) const {
+ std::string text = args.text;
+ const uint32_t line = args.line;
+ // column starts at 1.
+ const uint32_t utf16_codeunits = args.column - 1;
+
+ const auto cursor_pos_opt = GetCursorPos(text, line, utf16_codeunits);
+ if (!cursor_pos_opt)
+ return CompletionsResponseBody{};
+
+ size_t cursor_pos = *cursor_pos_opt;
+
// If we have a frame, try to set the context for variable completions.
lldb::SBFrame frame = dap.GetLLDBFrame(args.frameId);
if (frame.IsValid()) {
- frame.GetThread().GetProcess().SetSelectedThread(frame.GetThread());
- frame.GetThread().SetSelectedFrame(frame.GetFrameID());
- }
-
- std::string text = args.text;
- auto original_column = args.column;
- auto original_line = args.line;
- auto offset = original_column - 1;
- if (original_line > 1) {
- SmallVector<StringRef, 2> lines;
- StringRef(text).split(lines, '\n');
- for (int i = 0; i < original_line - 1; i++) {
- offset += lines[i].size();
- }
+ lldb::SBThread frame_thread = frame.GetThread();
+ frame_thread.GetProcess().SetSelectedThread(frame_thread);
+ frame_thread.SetSelectedFrame(frame.GetFrameID());
}
- std::vector<CompletionItem> targets;
-
- bool had_escape_prefix =
- StringRef(text).starts_with(dap.configuration.commandEscapePrefix);
- ReplMode completion_mode = dap.DetectReplMode(frame, text, true);
-
- // Handle the offset change introduced by stripping out the
+ const StringRef escape_prefix = dap.configuration.commandEscapePrefix;
+ const bool had_escape_prefix = StringRef(text).starts_with(escape_prefix);
+ const ReplMode repl_mode = dap.DetectReplMode(frame, text, true);
+ // Handle the cursor_pos change introduced by stripping out the
// `command_escape_prefix`.
if (had_escape_prefix) {
- if (offset <
- static_cast<int64_t>(dap.configuration.commandEscapePrefix.size())) {
- return CompletionsResponseBody{std::move(targets)};
- }
- offset -= dap.configuration.commandEscapePrefix.size();
+ if (cursor_pos < escape_prefix.size())
+ return CompletionsResponseBody{};
+
+ cursor_pos -= escape_prefix.size();
}
// While the user is typing then we likely have an incomplete input and cannot
// reliably determine the precise intent (command vs variable), try completing
// the text as both a command and variable expression, if applicable.
const std::string expr_prefix = "expression -- ";
- std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = {
- {std::make_tuple(ReplMode::Command, text, offset),
+ const std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = {
+ {std::make_tuple(ReplMode::Command, text, cursor_pos),
std::make_tuple(ReplMode::Variable, expr_prefix + text,
- offset + expr_prefix.size())}};
+ cursor_pos + expr_prefix.size())}};
+
+ CompletionsResponseBody response;
+ std::vector<CompletionItem> &targets = response.targets;
+ lldb::SBCommandInterpreter interpreter = dap.debugger.GetCommandInterpreter();
for (const auto &[mode, line, cursor] : exprs) {
- if (completion_mode != ReplMode::Auto && completion_mode != mode)
+ if (repl_mode != ReplMode::Auto && repl_mode != mode)
continue;
lldb::SBStringList matches;
lldb::SBStringList descriptions;
- if (!dap.debugger.GetCommandInterpreter().HandleCompletionWithDescriptions(
- line.c_str(), cursor, 0, 100, matches, descriptions))
+ if (!interpreter.HandleCompletionWithDescriptions(
+ line.c_str(), cursor, 0, 50, matches, descriptions))
continue;
// The first element is the common substring after the cursor position for
// all the matches. The rest of the elements are the matches so ignore the
// first result.
- for (size_t i = 1; i < matches.GetSize(); i++) {
- std::string match = matches.GetStringAtIndex(i);
- std::string description = descriptions.GetStringAtIndex(i);
+ for (uint32_t i = 1; i < matches.GetSize(); i++) {
+ const StringRef match = matches.GetStringAtIndex(i);
+ const StringRef description = descriptions.GetStringAtIndex(i);
- CompletionItem item;
StringRef match_ref = match;
- for (StringRef commit_point : {".", "->"}) {
- if (match_ref.contains(commit_point)) {
- match_ref = match_ref.rsplit(commit_point).second;
+ for (const StringRef commit_point : {".", "->"}) {
+ if (const size_t pos = match_ref.rfind(commit_point);
+ pos != StringRef::npos) {
+ match_ref = match_ref.substr(pos + commit_point.size());
}
}
- item.text = match_ref;
- if (description.empty())
- item.label = match;
- else
- item.label = match + " -- " + description;
+ CompletionItem item;
+ item.text = match_ref;
+ item.label = match;
+ if (!description.empty())
+ item.detail = description;
targets.emplace_back(std::move(item));
}
}
- return CompletionsResponseBody{std::move(targets)};
+ return response;
}
} // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp
index 8a651249aee02..e7407e9da9efb 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.cpp
+++ b/lldb/tools/lldb-dap/LLDBUtils.cpp
@@ -19,6 +19,7 @@
#include "lldb/lldb-defines.h"
#include "lldb/lldb-enumerations.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
@@ -277,4 +278,38 @@ lldb::SBLineEntry GetLineEntryForAddress(lldb::SBTarget &target,
return sc.GetLineEntry();
}
+std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line,
+ uint32_t utf16_codeunits) {
+ size_t bytes_count = 0;
+ size_t utf16_seen_cu = 0;
+ size_t idx = 0;
+ const size_t line_size = line.size();
+
+ while (idx < line_size && utf16_seen_cu < utf16_codeunits) {
+ const char first_char = line[idx];
+ const auto num_bytes = llvm::getNumBytesForUTF8(first_char);
+
+ if (num_bytes == 4) {
+ utf16_seen_cu += 2;
+ } else if (num_bytes < 4) {
+ utf16_seen_cu += 1;
+ } else {
+ // getNumBytesForUTF8 may return bytes greater than 4 this is not valid
+ // UTF8
+ return std::nullopt;
+ }
+
+ idx += num_bytes;
+ if (utf16_seen_cu <= utf16_codeunits) {
+ bytes_count = idx;
+ } else {
+ // We are in the middle of a codepoint or the utf16_codeunits ends in the
+ // middle of a codepoint.
+ return std::nullopt;
+ }
+ }
+
+ return bytes_count;
+}
+
} // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h
index 9545654504e8d..19174ba59654d 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.h
+++ b/lldb/tools/lldb-dap/LLDBUtils.h
@@ -248,6 +248,24 @@ llvm::Error ToError(const lldb::SBError &error, bool show_user = true);
/// Provides the string value if this data structure is a string type.
std::string GetStringValue(const lldb::SBStructuredData &data);
+/// Converts UTF16 column codeunits to bytes.
+/// we are recieving utf8 from the specification.
+/// UTF16 codunit size => 2 bytes.
+/// UTF8 codunit size => 1 byte.
+/// Example
+/// | info | info | utf16_cu | size in bytes |
+/// | fake f | ƒ | 1 | 2 |
+/// | fake c | ç | 1 | 3 |
+/// | poop char| 💩 | 2 | 4 |
+///
+/// so with inputs string of
+/// (`ƒ💩`, 3) we have 3 utf16_u and ( 2 + 4 ) bytes.
+/// (`ƒ💩`, 2) we have 3 utf16_u and ( 2 + 4 ) bytes but the position is in
+/// between the 💩 char so we return null since the codepoint is not complete.
+///
+/// see https://utf8everywhere.org/#characters for more info.
+std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line,
+ uint32_t utf16_codeunits);
} // namespace lldb_dap
#endif
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 9d5bdf1efe94c..f7b7bf2e4dda4 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -313,7 +313,7 @@ bool fromJSON(const llvm::json::Value &, LaunchRequestArguments &,
/// field is required.
using LaunchResponse = VoidResponse;
-#define LLDB_DAP_INVALID_PORT -1
+#define LLDB_DAP_INVALID_PORT (-1)
/// An invalid 'frameId' default value.
#define LLDB_DAP_INVALID_FRAME_ID UINT64_MAX
@@ -407,11 +407,11 @@ struct CompletionsArguments {
/// The position within `text` for which to determine the completion
/// proposals. It is measured in UTF-16 code units and the client capability
/// `columnsStartAt1` determines whether it is 0- or 1-based.
- int64_t column = 0;
+ uint32_t column = LLDB_INVALID_COLUMN_NUMBER;
/// A line for which to determine the completion proposals. If missing the
/// first line of the text is assumed.
- int64_t line = 0;
+ uint32_t line = 1;
};
bool fromJSON(const llvm::json::Value &, CompletionsArguments &,
llvm::json::Path);
diff --git a/lldb/unittests/DAP/LLDBUtilsTest.cpp b/lldb/unittests/DAP/LLDBUtilsTest.cpp
index 4f619af2b136e..ade452baf12ba 100644
--- a/lldb/unittests/DAP/LLDBUtilsTest.cpp
+++ b/lldb/unittests/DAP/LLDBUtilsTest.cpp
@@ -9,6 +9,7 @@
#include "LLDBUtils.h"
#include "lldb/API/SBError.h"
#include "lldb/API/SBStructuredData.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Error.h"
#include "gtest/gtest.h"
@@ -63,3 +64,30 @@ TEST(LLDBUtilsTest, ToError) {
std::string error_message = toString(std::move(llvm_error));
EXPECT_EQ(error_message, "Test error message");
}
+
+TEST(LLDBUtilsTest, UTF16Codeunits) {
+ using Expect = std::optional<size_t>;
+
+ EXPECT_EQ(UTF16CodeunitToBytes("a", 0), Expect{0});
+ EXPECT_EQ(UTF16CodeunitToBytes("some word", 4), Expect{4});
+ EX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/176796
More information about the llvm-branch-commits
mailing list