[llvm-branch-commits] [lldb] release/22.x: [lldb-dap] Fix Completions Request crash (#176211) (PR #176796)

Mon Jan 19 10:19:25 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-lldb

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport 3ca7a729901851f4a6f83e9783ee393cca46fd12

Requested by: @da-viper

---

Patch is 21.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176796.diff


9 Files Affected:

- (modified) lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py (+6-1) 
- (modified) lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py (+37-25) 
- (modified) lldb/test/API/tools/lldb-dap/completions/main.cpp (+1) 
- (modified) lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp (+99-46) 
- (modified) lldb/tools/lldb-dap/LLDBUtils.cpp (+35) 
- (modified) lldb/tools/lldb-dap/LLDBUtils.h (+18) 
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+3-3) 
- (modified) lldb/unittests/DAP/LLDBUtilsTest.cpp (+28) 
- (modified) lldb/unittests/DAP/ProtocolTypesTest.cpp (+3-3) 


``````````diff

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 6a98bb50c9036..a79d766118b9d 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -1393,7 +1393,12 @@ def request_compileUnits(self, moduleId):
         return response
 
     def request_completions(self, text, frameId=None):
-        args_dict = {"text": text, "column": len(text) + 1}
+        def code_units(input: str) -> int:
+            utf16_bytes = input.encode("utf-16-le")
+            # one UTF16 codeunit = 2 bytes.
+            return len(utf16_bytes) // 2
+
+        args_dict = {"text": text, "column": code_units(text) + 1}
         if frameId:
             args_dict["frameId"] = frameId
         command_dict = {
diff --git a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
index 0ebecf6872a7d..1792ff9953efe 100644
--- a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
+++ b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
@@ -5,31 +5,32 @@
 import lldbdap_testcase
 import dap_server
 from lldbsuite.test import lldbutil
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import skipIf
+from lldbsuite.test.lldbtest import line_number
 
 session_completion = {
     "text": "session",
-    "label": "session -- Commands controlling LLDB session.",
+    "label": "session",
+    "detail": "Commands controlling LLDB session.",
 }
 settings_completion = {
     "text": "settings",
-    "label": "settings -- Commands for managing LLDB settings.",
+    "label": "settings",
+    "detail": "Commands for managing LLDB settings.",
 }
 memory_completion = {
     "text": "memory",
-    "label": "memory -- Commands for operating on memory in the current target process.",
+    "label": "memory",
+    "detail": "Commands for operating on memory in the current target process.",
 }
 command_var_completion = {
     "text": "var",
-    "label": "var -- Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.",
+    "label": "var",
+    "detail": "Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.",
 }
-variable_var_completion = {
-    "text": "var",
-    "label": "var -- vector<baz> &",
-}
-variable_var1_completion = {"text": "var1", "label": "var1 -- int &"}
-variable_var2_completion = {"text": "var2", "label": "var2 -- int &"}
+variable_var_completion = {"text": "var", "label": "var", "detail": "vector<baz> &"}
+variable_var1_completion = {"text": "var1", "label": "var1", "detail": "int &"}
+variable_var2_completion = {"text": "var2", "label": "var2", "detail": "int &"}
 
 
 # Older version of libcxx produce slightly different typename strings for
@@ -79,11 +80,13 @@ def test_command_completions(self):
             [
                 {
                     "text": "read",
-                    "label": "read -- Read from the memory of the current target process.",
+                    "label": "read",
+                    "detail": "Read from the memory of the current target process.",
                 },
                 {
                     "text": "region",
-                    "label": "region -- Get information on the memory region containing an address in the current target process.",
+                    "label": "region",
+                    "detail": "Get information on the memory region containing an address in the current target process.",
                 },
             ],
         )
@@ -110,7 +113,8 @@ def test_command_completions(self):
             [
                 {
                     "text": "set",
-                    "label": "set -- Set the value of the specified debugger setting.",
+                    "label": "set",
+                    "detail": "Set the value of the specified debugger setting.",
                 }
             ],
         )
@@ -167,7 +171,7 @@ def test_variable_completions(self):
         self.verify_completions(
             self.dap_server.get_completions("str"),
             [{"text": "struct", "label": "struct"}],
-            [{"text": "str1", "label": "str1 -- std::string &"}],
+            [{"text": "str1", "label": "str1", "detail": "std::string &"}],
         )
 
         self.continue_to_next_stop()
@@ -189,42 +193,46 @@ def test_variable_completions(self):
             self.dap_server.get_completions("str"),
             [
                 {"text": "struct", "label": "struct"},
-                {"text": "str1", "label": "str1 -- std::string &"},
+                {"text": "str1", "label": "str1", "detail": "std::string &"},
             ],
         )
 
+        self.assertIsNotNone(self.dap_server.get_completions("ƒ"))
+        # Test utf8 after ascii.
+        self.dap_server.get_completions("mƒ")
+
         # Completion also works for more complex expressions
         self.verify_completions(
             self.dap_server.get_completions("foo1.v"),
-            [{"text": "var1", "label": "foo1.var1 -- int"}],
+            [{"text": "var1", "label": "foo1.var1", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1.my_bar_object.v"),
-            [{"text": "var1", "label": "foo1.my_bar_object.var1 -- int"}],
+            [{"text": "var1", "label": "foo1.my_bar_object.var1", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1.var1 + foo1.v"),
-            [{"text": "var1", "label": "foo1.var1 -- int"}],
+            [{"text": "var1", "label": "foo1.var1", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1.var1 + v"),
-            [{"text": "var1", "label": "var1 -- int &"}],
+            [{"text": "var1", "label": "var1", "detail": "int &"}],
         )
 
         # should correctly handle spaces between objects and member operators
         self.verify_completions(
             self.dap_server.get_completions("foo1 .v"),
-            [{"text": "var1", "label": ".var1 -- int"}],
-            [{"text": "var2", "label": ".var2 -- int"}],
+            [{"text": "var1", "label": ".var1", "detail": "int"}],
+            [{"text": "var2", "label": ".var2", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1 . v"),
-            [{"text": "var1", "label": "var1 -- int"}],
-            [{"text": "var2", "label": "var2 -- int"}],
+            [{"text": "var1", "label": "var1", "detail": "int"}],
+            [{"text": "var2", "label": "var2", "detail": "int"}],
         )
 
         # Even in variable mode, we can still use the escape prefix
@@ -273,6 +281,10 @@ def test_auto_completions(self):
             ],
         )
 
+        # TODO: Note we are not checking the result because the `expression --` command adds an extra character
+        # for non ascii variables.
+        self.assertIsNotNone(self.dap_server.get_completions("ƒ"))
+
         self.continue_to_exit()
         console_str = self.get_console()
         # we check in console to avoid waiting for output event.
diff --git a/lldb/test/API/tools/lldb-dap/completions/main.cpp b/lldb/test/API/tools/lldb-dap/completions/main.cpp
index 4314067cfe951..a12ce18201d97 100644
--- a/lldb/test/API/tools/lldb-dap/completions/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/completions/main.cpp
@@ -29,6 +29,7 @@ int main(int argc, char const *argv[]) {
   fun(vec);
   bar bar1 = {2};
   bar *bar2 = &bar1;
+  int ƒake_f = 200;
   foo foo1 = {3, &bar1, bar1, NULL};
   return 0; // breakpoint 2
 }
diff --git a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
index de9a15dcb73f4..9a3973190f7e7 100644
--- a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
@@ -7,17 +7,70 @@
 //===----------------------------------------------------------------------===//
 
 #include "DAP.h"
-#include "JSONUtils.h"
+#include "LLDBUtils.h"
 #include "Protocol/ProtocolRequests.h"
 #include "Protocol/ProtocolTypes.h"
 #include "RequestHandler.h"
 #include "lldb/API/SBStringList.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace lldb_dap;
+using namespace lldb;
 using namespace lldb_dap::protocol;
 
 namespace lldb_dap {
+/// Gets the position in the UTF8 string where the specified line started.
+static size_t GetLineStartPos(StringRef text, uint32_t line) {
+  if (line == 0) // Invalid line.
+    return StringRef::npos;
+
+  if (line == 1)
+    return 0;
+
+  uint32_t cur_line = 1;
+  size_t pos = 0;
+
+  while (cur_line < line) {
+    const size_t new_line_pos = text.find('\n', pos);
+
+    if (new_line_pos == StringRef::npos)
+      return new_line_pos;
+
+    pos = new_line_pos + 1;
+    // text may end with a new line
+    if (pos >= text.size())
+      return StringRef::npos;
+
+    cur_line++;
+  }
+
+  assert(pos < text.size());
+  return pos;
+}
+
+static std::optional<size_t> GetCursorPos(StringRef text, uint32_t line,
+                                          uint32_t utf16_codeunits) {
+  if (text.empty())
+    return std::nullopt;
+
+  const size_t line_start_pos = GetLineStartPos(text, line);
+  if (line_start_pos == StringRef::npos)
+    return std::nullopt;
+
+  const StringRef completion_line =
+      text.substr(line_start_pos, text.find('\n', line_start_pos));
+  if (completion_line.empty())
+    return std::nullopt;
+
+  const std::optional<size_t> cursor_pos_opt =
+      UTF16CodeunitToBytes(completion_line, utf16_codeunits);
+  if (!cursor_pos_opt)
+    return std::nullopt;
+
+  const size_t cursor_pos = line_start_pos + *cursor_pos_opt;
+  return cursor_pos;
+}
 
 /// Returns a list of possible completions for a given caret position and text.
 ///
@@ -25,85 +78,85 @@ namespace lldb_dap {
 /// `supportsCompletionsRequest` is true.
 Expected<CompletionsResponseBody>
 CompletionsRequestHandler::Run(const CompletionsArguments &args) const {
+  std::string text = args.text;
+  const uint32_t line = args.line;
+  // column starts at 1.
+  const uint32_t utf16_codeunits = args.column - 1;
+
+  const auto cursor_pos_opt = GetCursorPos(text, line, utf16_codeunits);
+  if (!cursor_pos_opt)
+    return CompletionsResponseBody{};
+
+  size_t cursor_pos = *cursor_pos_opt;
+
   // If we have a frame, try to set the context for variable completions.
   lldb::SBFrame frame = dap.GetLLDBFrame(args.frameId);
   if (frame.IsValid()) {
-    frame.GetThread().GetProcess().SetSelectedThread(frame.GetThread());
-    frame.GetThread().SetSelectedFrame(frame.GetFrameID());
-  }
-
-  std::string text = args.text;
-  auto original_column = args.column;
-  auto original_line = args.line;
-  auto offset = original_column - 1;
-  if (original_line > 1) {
-    SmallVector<StringRef, 2> lines;
-    StringRef(text).split(lines, '\n');
-    for (int i = 0; i < original_line - 1; i++) {
-      offset += lines[i].size();
-    }
+    lldb::SBThread frame_thread = frame.GetThread();
+    frame_thread.GetProcess().SetSelectedThread(frame_thread);
+    frame_thread.SetSelectedFrame(frame.GetFrameID());
   }
 
-  std::vector<CompletionItem> targets;
-
-  bool had_escape_prefix =
-      StringRef(text).starts_with(dap.configuration.commandEscapePrefix);
-  ReplMode completion_mode = dap.DetectReplMode(frame, text, true);
-
-  // Handle the offset change introduced by stripping out the
+  const StringRef escape_prefix = dap.configuration.commandEscapePrefix;
+  const bool had_escape_prefix = StringRef(text).starts_with(escape_prefix);
+  const ReplMode repl_mode = dap.DetectReplMode(frame, text, true);
+  // Handle the cursor_pos change introduced by stripping out the
   // `command_escape_prefix`.
   if (had_escape_prefix) {
-    if (offset <
-        static_cast<int64_t>(dap.configuration.commandEscapePrefix.size())) {
-      return CompletionsResponseBody{std::move(targets)};
-    }
-    offset -= dap.configuration.commandEscapePrefix.size();
+    if (cursor_pos < escape_prefix.size())
+      return CompletionsResponseBody{};
+
+    cursor_pos -= escape_prefix.size();
   }
 
   // While the user is typing then we likely have an incomplete input and cannot
   // reliably determine the precise intent (command vs variable), try completing
   // the text as both a command and variable expression, if applicable.
   const std::string expr_prefix = "expression -- ";
-  std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = {
-      {std::make_tuple(ReplMode::Command, text, offset),
+  const std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = {
+      {std::make_tuple(ReplMode::Command, text, cursor_pos),
        std::make_tuple(ReplMode::Variable, expr_prefix + text,
-                       offset + expr_prefix.size())}};
+                       cursor_pos + expr_prefix.size())}};
+
+  CompletionsResponseBody response;
+  std::vector<CompletionItem> &targets = response.targets;
+  lldb::SBCommandInterpreter interpreter = dap.debugger.GetCommandInterpreter();
   for (const auto &[mode, line, cursor] : exprs) {
-    if (completion_mode != ReplMode::Auto && completion_mode != mode)
+    if (repl_mode != ReplMode::Auto && repl_mode != mode)
       continue;
 
     lldb::SBStringList matches;
     lldb::SBStringList descriptions;
-    if (!dap.debugger.GetCommandInterpreter().HandleCompletionWithDescriptions(
-            line.c_str(), cursor, 0, 100, matches, descriptions))
+    if (!interpreter.HandleCompletionWithDescriptions(
+            line.c_str(), cursor, 0, 50, matches, descriptions))
       continue;
 
     // The first element is the common substring after the cursor position for
     // all the matches. The rest of the elements are the matches so ignore the
     // first result.
-    for (size_t i = 1; i < matches.GetSize(); i++) {
-      std::string match = matches.GetStringAtIndex(i);
-      std::string description = descriptions.GetStringAtIndex(i);
+    for (uint32_t i = 1; i < matches.GetSize(); i++) {
+      const StringRef match = matches.GetStringAtIndex(i);
+      const StringRef description = descriptions.GetStringAtIndex(i);
 
-      CompletionItem item;
       StringRef match_ref = match;
-      for (StringRef commit_point : {".", "->"}) {
-        if (match_ref.contains(commit_point)) {
-          match_ref = match_ref.rsplit(commit_point).second;
+      for (const StringRef commit_point : {".", "->"}) {
+        if (const size_t pos = match_ref.rfind(commit_point);
+            pos != StringRef::npos) {
+          match_ref = match_ref.substr(pos + commit_point.size());
         }
       }
-      item.text = match_ref;
 
-      if (description.empty())
-        item.label = match;
-      else
-        item.label = match + " -- " + description;
+      CompletionItem item;
+      item.text = match_ref;
+      item.label = match;
+      if (!description.empty())
+        item.detail = description;
 
       targets.emplace_back(std::move(item));
     }
   }
 
-  return CompletionsResponseBody{std::move(targets)};
+  return response;
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp
index 8a651249aee02..e7407e9da9efb 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.cpp
+++ b/lldb/tools/lldb-dap/LLDBUtils.cpp
@@ -19,6 +19,7 @@
 #include "lldb/lldb-defines.h"
 #include "lldb/lldb-enumerations.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/raw_ostream.h"
@@ -277,4 +278,38 @@ lldb::SBLineEntry GetLineEntryForAddress(lldb::SBTarget &target,
   return sc.GetLineEntry();
 }
 
+std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line,
+                                           uint32_t utf16_codeunits) {
+  size_t bytes_count = 0;
+  size_t utf16_seen_cu = 0;
+  size_t idx = 0;
+  const size_t line_size = line.size();
+
+  while (idx < line_size && utf16_seen_cu < utf16_codeunits) {
+    const char first_char = line[idx];
+    const auto num_bytes = llvm::getNumBytesForUTF8(first_char);
+
+    if (num_bytes == 4) {
+      utf16_seen_cu += 2;
+    } else if (num_bytes < 4) {
+      utf16_seen_cu += 1;
+    } else {
+      // getNumBytesForUTF8 may return bytes greater than 4 this is not valid
+      // UTF8
+      return std::nullopt;
+    }
+
+    idx += num_bytes;
+    if (utf16_seen_cu <= utf16_codeunits) {
+      bytes_count = idx;
+    } else {
+      // We are in the middle of a codepoint or the utf16_codeunits ends in the
+      // middle of a codepoint.
+      return std::nullopt;
+    }
+  }
+
+  return bytes_count;
+}
+
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h
index 9545654504e8d..19174ba59654d 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.h
+++ b/lldb/tools/lldb-dap/LLDBUtils.h
@@ -248,6 +248,24 @@ llvm::Error ToError(const lldb::SBError &error, bool show_user = true);
 /// Provides the string value if this data structure is a string type.
 std::string GetStringValue(const lldb::SBStructuredData &data);
 
+/// Converts UTF16 column codeunits to bytes.
+/// we are recieving utf8 from the specification.
+/// UTF16 codunit size => 2 bytes.
+/// UTF8 codunit size => 1 byte.
+/// Example
+/// | info     | info  | utf16_cu | size in bytes |
+/// | fake f   | ƒ     | 1        | 2             |
+/// | fake c   | ç     | 1        | 3             |
+/// | poop char| 💩    | 2        | 4             |
+///
+/// so with inputs string of
+/// (`ƒ💩`, 3) we have 3 utf16_u and ( 2 + 4 ) bytes.
+/// (`ƒ💩`, 2) we have 3 utf16_u and ( 2 + 4 ) bytes but the position is in
+///  between the 💩 char so we return null since the codepoint is not complete.
+///
+/// see https://utf8everywhere.org/#characters for more info.
+std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line,
+                                           uint32_t utf16_codeunits);
 } // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 9d5bdf1efe94c..f7b7bf2e4dda4 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -313,7 +313,7 @@ bool fromJSON(const llvm::json::Value &, LaunchRequestArguments &,
 /// field is required.
 using LaunchResponse = VoidResponse;
 
-#define LLDB_DAP_INVALID_PORT -1
+#define LLDB_DAP_INVALID_PORT (-1)
 /// An invalid 'frameId' default value.
 #define LLDB_DAP_INVALID_FRAME_ID UINT64_MAX
 
@@ -407,11 +407,11 @@ struct CompletionsArguments {
   /// The position within `text` for which to determine the completion
   /// proposals. It is measured in UTF-16 code units and the client capability
   /// `columnsStartAt1` determines whether it is 0- or 1-based.
-  int64_t column = 0;
+  uint32_t column = LLDB_INVALID_COLUMN_NUMBER;
 
   /// A line for which to determine the completion proposals. If missing the
   /// first line of the text is assumed.
-  int64_t line = 0;
+  uint32_t line = 1;
 };
 bool fromJSON(const llvm::json::Value &, CompletionsArguments &,
               llvm::json::Path);
diff --git a/lldb/unittests/DAP/LLDBUtilsTest.cpp b/lldb/unittests/DAP/LLDBUtilsTest.cpp
index 4f619af2b136e..ade452baf12ba 100644
--- a/lldb/unittests/DAP/LLDBUtilsTest.cpp
+++ b/lldb/unittests/DAP/LLDBUtilsTest.cpp
@@ -9,6 +9,7 @@
 #include "LLDBUtils.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBStructuredData.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Error.h"
 #include "gtest/gtest.h"
 
@@ -63,3 +64,30 @@ TEST(LLDBUtilsTest, ToError) {
   std::string error_message = toString(std::move(llvm_error));
   EXPECT_EQ(error_message, "Test error message");
 }
+
+TEST(LLDBUtilsTest, UTF16Codeunits) {
+  using Expect = std::optional<size_t>;
+
+  EXPECT_EQ(UTF16CodeunitToBytes("a", 0), Expect{0});
+  EXPECT_EQ(UTF16CodeunitToBytes("some word", 4), Expect{4});
+  EX...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/176796