[llvm] [Dexter] Add DAP instruction and function breakpoint handling (PR #152718)

Orlando Cazalet-Hyams via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 27 03:49:05 PDT 2025


https://github.com/OCHyams updated https://github.com/llvm/llvm-project/pull/152718

>From 7089cfd2061515732a41ca05b7d44ed8cf3dec36 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Thu, 7 Aug 2025 14:38:25 +0100
Subject: [PATCH 1/5] [Dexter] Add DAP instruction and function breakpoint
 handling

---
 .../dexter/dex/debugger/DAP.py                | 151 ++++++++++++++----
 .../dexter/dex/debugger/DebuggerBase.py       |  16 ++
 .../dexter/dex/debugger/lldb/LLDB.py          |   7 +
 3 files changed, 145 insertions(+), 29 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
index 3921e7f407f06..e8b32524463ba 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
@@ -185,6 +185,10 @@ def __init__(self, context, *args):
         self.file_to_bp = defaultdict(list)
         # { dex_breakpoint_id -> (file, line, condition) }
         self.bp_info = {}
+        # { dex_breakpoint_id -> function_name }
+        self.function_bp_info = {}
+        # { dex_breakpoint_id -> instruction_reference }
+        self.instruction_bp_info = {}
         # We don't rely on IDs returned directly from the debug adapter. Instead, we use dexter breakpoint IDs, and
         # maintain a two-way-mapping of dex_bp_id<->dap_bp_id. This also allows us to defer the setting of breakpoints
         # in the debug adapter itself until necessary.
@@ -193,6 +197,8 @@ def __init__(self, context, *args):
         self.dex_id_to_dap_id = {}
         self.dap_id_to_dex_ids = {}
         self.pending_breakpoints: bool = False
+        self.pending_function_breakpoints: bool = False
+        self.pending_instruction_breakpoints: bool = False
         # List of breakpoints, indexed by BP ID
         # Each entry has the source file (for use in referencing desired_bps), and the DA-assigned
         # ID for that breakpoint if it has one (if it has been removed or not yet created then it will be None).
@@ -255,6 +261,26 @@ def make_set_breakpoint_request(source: str, bps) -> dict:
             {"source": {"path": source}, "breakpoints": [bp.toDict() for bp in bps]},
         )
 
+    @staticmethod
+    def make_set_function_breakpoint_request(function_names: list[str]) -> dict:
+        # Function breakpoints may specify conditions and hit counts, though we
+        # don't use those here (though perhaps we should use native hit count,
+        # rather than emulating it ConditionalController, now that we have a
+        # shared interface (DAP)).
+        return DAP.make_request(
+            "setFunctionBreakpoints",
+            {"breakpoints": [{"name": f} for f in function_names]},
+        )
+
+    @staticmethod
+    def make_set_instruction_breakpoint_request(addrs: list[str]) -> dict:
+        # Instruction breakpoints have additional fields we're ignoring for the
+        # moment.
+        return DAP.make_request(
+            "setInstructionBreakpoints",
+            {"breakpoints": [{"instructionReference": a} for a in addrs]},
+        )
+
     ############################################################################
     ## DAP communication & state-handling functions
 
@@ -524,6 +550,22 @@ def clear_breakpoints(self):
     def _add_breakpoint(self, file, line):
         return self._add_conditional_breakpoint(file, line, None)
 
+    def add_function_breakpoint(self, name: str):
+        if not self._debugger_state.capabilities.supportsFunctionBreakpoints:
+            raise DebuggerException("Debugger does not support function breakpoints")
+        new_id = self.get_next_bp_id()
+        self.function_bp_info[new_id] = name
+        self.pending_function_breakpoints = True
+        return new_id
+
+    def add_instruction_breakpoint(self, addr: str):
+        if not self._debugger_state.capabilities.supportsInstructionBreakpoints:
+            raise DebuggerException("Debugger does not support instruction breakpoints")
+        new_id = self.get_next_bp_id()
+        self.instruction_bp_info[new_id] = addr
+        self.pending_instruction_breakpoints = True
+        return new_id
+
     def _add_conditional_breakpoint(self, file, line, condition):
         new_id = self.get_next_bp_id()
         self.file_to_bp[file].append(new_id)
@@ -531,38 +573,75 @@ def _add_conditional_breakpoint(self, file, line, condition):
         self.pending_breakpoints = True
         return new_id
 
+    def _update_breakpoint_ids_after_request(
+        self, dex_bp_ids: list[int], response: dict
+    ):
+        dap_bp_ids = [bp["id"] for bp in response["body"]["breakpoints"]]
+        if len(dex_bp_ids) != len(dap_bp_ids):
+            self.context.logger.error(
+                f"Sent request to set {len(dex_bp_ids)} breakpoints, but received {len(dap_bp_ids)} in response."
+            )
+        visited_dap_ids = set()
+        for i, dex_bp_id in enumerate(dex_bp_ids):
+            dap_bp_id = dap_bp_ids[i]
+            self.dex_id_to_dap_id[dex_bp_id] = dap_bp_id
+            # We take the mappings in the response as the canonical mapping, meaning that if the debug server has
+            # simply *changed* the DAP ID for a breakpoint we overwrite the existing mapping rather than adding to
+            # it, but if we receive the same DAP ID for multiple Dex IDs *then* we store a one-to-many mapping.
+            if dap_bp_id in visited_dap_ids:
+                self.dap_id_to_dex_ids[dap_bp_id].append(dex_bp_id)
+            else:
+                self.dap_id_to_dex_ids[dap_bp_id] = [dex_bp_id]
+                visited_dap_ids.add(dap_bp_id)
+
     def _flush_breakpoints(self):
-        if not self.pending_breakpoints:
-            return
-        for file in self.file_to_bp.keys():
-            desired_bps = self._get_desired_bps(file)
+        # Normal and conditional breakpoints.
+        if self.pending_breakpoints:
+            self.pending_breakpoints = False
+            for file in self.file_to_bp.keys():
+                desired_bps = self._get_desired_bps(file)
+                request_id = self.send_message(
+                    self.make_set_breakpoint_request(file, desired_bps)
+                )
+                result = self._await_response(request_id, 10)
+                if not result["success"]:
+                    raise DebuggerException(f"could not set breakpoints for '{file}'")
+                # The debug adapter may have chosen to merge our breakpoints. From here we need to identify such cases and
+                # handle them so that our internal bookkeeping is correct.
+                dex_bp_ids = self.get_current_bps(file)
+                self._update_breakpoint_ids_after_request(dex_bp_ids, result)
+
+        # Funciton breakpoints.
+        if self.pending_function_breakpoints:
+            self.pending_function_breakpoints = False
+            desired_bps = list(self.function_bp_info.values())
             request_id = self.send_message(
-                self.make_set_breakpoint_request(file, desired_bps)
+                self.make_set_function_breakpoint_request(desired_bps)
             )
             result = self._await_response(request_id, 10)
             if not result["success"]:
-                raise DebuggerException(f"could not set breakpoints for '{file}'")
-            # The debug adapter may have chosen to merge our breakpoints. From here we need to identify such cases and
-            # handle them so that our internal bookkeeping is correct.
-            dex_bp_ids = self.get_current_bps(file)
-            dap_bp_ids = [bp["id"] for bp in result["body"]["breakpoints"]]
-            if len(dex_bp_ids) != len(dap_bp_ids):
-                self.context.logger.error(
-                    f"Sent request to set {len(dex_bp_ids)} breakpoints, but received {len(dap_bp_ids)} in response."
+                raise DebuggerException(
+                    f"could not set function breakpoints: '{desired_bps}'"
                 )
-            visited_dap_ids = set()
-            for i, dex_bp_id in enumerate(dex_bp_ids):
-                dap_bp_id = dap_bp_ids[i]
-                self.dex_id_to_dap_id[dex_bp_id] = dap_bp_id
-                # We take the mappings in the response as the canonical mapping, meaning that if the debug server has
-                # simply *changed* the DAP ID for a breakpoint we overwrite the existing mapping rather than adding to
-                # it, but if we receive the same DAP ID for multiple Dex IDs *then* we store a one-to-many mapping.
-                if dap_bp_id in visited_dap_ids:
-                    self.dap_id_to_dex_ids[dap_bp_id].append(dex_bp_id)
-                else:
-                    self.dap_id_to_dex_ids[dap_bp_id] = [dex_bp_id]
-                    visited_dap_ids.add(dap_bp_id)
-        self.pending_breakpoints = False
+            # Is this right? Are we guarenteed the order of the outgoing/incoming lists?
+            dex_bp_ids = list(self.function_bp_info.keys())
+            self._update_breakpoint_ids_after_request(dex_bp_ids, result)
+
+        # Address / instruction breakpoints.
+        if self.pending_instruction_breakpoints:
+            self.pending_instruction_breakpoints = False
+            desired_bps = list(self.instruction_bp_info.values())
+            request_id = self.send_message(
+                self.make_set_instruction_breakpoint_request(desired_bps)
+            )
+            result = self._await_response(request_id, 10)
+            if not result["success"]:
+                raise DebuggerException(
+                    f"could not set instruction breakpoints: '{desired_bps}'"
+                )
+            # Is this right? Are we guarenteed the order of the outgoing/incoming lists?
+            dex_bp_ids = list(self.instruction_bp_info.keys())
+            self._update_breakpoint_ids_after_request(dex_bp_ids, result)
 
     def _confirm_triggered_breakpoint_ids(self, dex_bp_ids):
         """Can be overridden for any specific implementations that need further processing from the debug server's
@@ -587,8 +666,16 @@ def get_triggered_breakpoint_ids(self):
     def delete_breakpoints(self, ids):
         per_file_deletions = defaultdict(list)
         for dex_bp_id in ids:
-            source, _, _ = self.bp_info[dex_bp_id]
-            per_file_deletions[source].append(dex_bp_id)
+            if dex_bp_id in self.bp_info:
+                source, _, _ = self.bp_info[dex_bp_id]
+                per_file_deletions[source].append(dex_bp_id)
+            elif dex_bp_id in self.function_bp_info:
+                del self.function_bp_info[dex_bp_id]
+                self.pending_function_breakpoints = True
+            elif dex_bp_id in self.instruction_bp_info:
+                del self.instruction_bp_info[dex_bp_id]
+                self.pending_instruction_breakpoints = True
+
         for file, deleted_ids in per_file_deletions.items():
             old_len = len(self.file_to_bp[file])
             self.file_to_bp[file] = [
@@ -606,7 +693,13 @@ def _get_launch_params(self, cmdline):
         """ "Set the debugger-specific params used in a launch request."""
 
     def launch(self, cmdline):
-        assert len(self.file_to_bp.keys()) > 0
+        # FIXME: This should probably not a warning, not an assert.
+        assert (
+            len(self.file_to_bp)
+            + len(self.function_bp_info)
+            + len(self.instruction_bp_info)
+            > 0
+        ), "Expected at least one breakpoint before launching"
 
         if self.context.options.target_run_args:
             cmdline += shlex.split(self.context.options.target_run_args)
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py
index bf7552bd5fe3a..dd5cc5d3f0230 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py
@@ -166,6 +166,22 @@ def _add_conditional_breakpoint(self, file_, line, condition):
         """Returns a unique opaque breakpoint id."""
         pass
 
+    def add_function_breakpoint(self, name):
+        """Returns a unique opaque breakpoint id.
+
+        The ID type depends on the debugger being used, but will probably be
+        an int.
+        """
+        raise NotImplementedError()
+
+    def add_instruction_breakpoint(self, addr):
+        """Returns a unique opaque breakpoint id.
+
+        The ID type depends on the debugger being used, but will probably be
+        an int.
+        """
+        raise NotImplementedError()
+
     @abc.abstractmethod
     def delete_breakpoints(self, ids):
         """Delete a set of breakpoints by ids.
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
index 4263fdb5aa019..f40415e77ec47 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
@@ -528,6 +528,13 @@ def _confirm_triggered_breakpoint_ids(self, dex_bp_ids):
         manually check conditions here."""
         confirmed_breakpoint_ids = set()
         for dex_bp_id in dex_bp_ids:
+            # Function and instruction breakpoints don't use conditions.
+            # FIXME: That's not a DAP restruction, so they could in future.
+            if dex_bp_id not in self.bp_info:
+                assert dex_bp_id in self.function_bp_info or dex_bp_id in self.instruction_bp_info
+                confirmed_breakpoint_ids.add(dex_bp_id)
+                continue
+
             _, _, cond = self.bp_info[dex_bp_id]
             if cond is None:
                 confirmed_breakpoint_ids.add(dex_bp_id)

>From 85fe6e3ced8c1484b9fcafb9ea8de7e6839d5f76 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Tue, 12 Aug 2025 12:09:58 +0100
Subject: [PATCH 2/5] rm type subscripts

---
 .../debuginfo-tests/dexter/dex/debugger/DAP.py              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
index e8b32524463ba..4e5fdb9f1a535 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
@@ -262,7 +262,7 @@ def make_set_breakpoint_request(source: str, bps) -> dict:
         )
 
     @staticmethod
-    def make_set_function_breakpoint_request(function_names: list[str]) -> dict:
+    def make_set_function_breakpoint_request(function_names: list) -> dict:
         # Function breakpoints may specify conditions and hit counts, though we
         # don't use those here (though perhaps we should use native hit count,
         # rather than emulating it ConditionalController, now that we have a
@@ -273,7 +273,7 @@ def make_set_function_breakpoint_request(function_names: list[str]) -> dict:
         )
 
     @staticmethod
-    def make_set_instruction_breakpoint_request(addrs: list[str]) -> dict:
+    def make_set_instruction_breakpoint_request(addrs: list) -> dict:
         # Instruction breakpoints have additional fields we're ignoring for the
         # moment.
         return DAP.make_request(
@@ -574,7 +574,7 @@ def _add_conditional_breakpoint(self, file, line, condition):
         return new_id
 
     def _update_breakpoint_ids_after_request(
-        self, dex_bp_ids: list[int], response: dict
+        self, dex_bp_ids: list, response: dict
     ):
         dap_bp_ids = [bp["id"] for bp in response["body"]["breakpoints"]]
         if len(dex_bp_ids) != len(dap_bp_ids):

>From 7930a18c13084da482d4ef915ef060ac0e24ff5d Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Tue, 12 Aug 2025 12:11:20 +0100
Subject: [PATCH 3/5] fmt

---
 .../debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py         | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
index f40415e77ec47..bdb3095bff913 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
@@ -531,7 +531,10 @@ def _confirm_triggered_breakpoint_ids(self, dex_bp_ids):
             # Function and instruction breakpoints don't use conditions.
             # FIXME: That's not a DAP restruction, so they could in future.
             if dex_bp_id not in self.bp_info:
-                assert dex_bp_id in self.function_bp_info or dex_bp_id in self.instruction_bp_info
+                assert (
+                    dex_bp_id in self.function_bp_info
+                    or dex_bp_id in self.instruction_bp_info
+                )
                 confirmed_breakpoint_ids.add(dex_bp_id)
                 continue
 

>From a258cae2803400556c074d79a141428fc135d0b7 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Tue, 12 Aug 2025 17:31:04 +0100
Subject: [PATCH 4/5] update comment

---
 .../debuginfo-tests/dexter/dex/debugger/DAP.py                | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
index 4e5fdb9f1a535..7b1e3edee7298 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
@@ -623,7 +623,7 @@ def _flush_breakpoints(self):
                 raise DebuggerException(
                     f"could not set function breakpoints: '{desired_bps}'"
                 )
-            # Is this right? Are we guarenteed the order of the outgoing/incoming lists?
+            # We expect the breakpoint order to match in request and response.
             dex_bp_ids = list(self.function_bp_info.keys())
             self._update_breakpoint_ids_after_request(dex_bp_ids, result)
 
@@ -639,7 +639,7 @@ def _flush_breakpoints(self):
                 raise DebuggerException(
                     f"could not set instruction breakpoints: '{desired_bps}'"
                 )
-            # Is this right? Are we guarenteed the order of the outgoing/incoming lists?
+            # We expect the breakpoint order to match in request and response.
             dex_bp_ids = list(self.instruction_bp_info.keys())
             self._update_breakpoint_ids_after_request(dex_bp_ids, result)
 

>From 423ec7727a0407beaf4337423ef26b110f21dd2c Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Wed, 27 Aug 2025 11:48:45 +0100
Subject: [PATCH 5/5] nits

---
 .../debuginfo-tests/dexter/dex/debugger/DAP.py                | 4 ++--
 .../debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
index 7b1e3edee7298..cff6d64a1b9d6 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py
@@ -611,7 +611,7 @@ def _flush_breakpoints(self):
                 dex_bp_ids = self.get_current_bps(file)
                 self._update_breakpoint_ids_after_request(dex_bp_ids, result)
 
-        # Funciton breakpoints.
+        # Function breakpoints.
         if self.pending_function_breakpoints:
             self.pending_function_breakpoints = False
             desired_bps = list(self.function_bp_info.values())
@@ -693,7 +693,7 @@ def _get_launch_params(self, cmdline):
         """ "Set the debugger-specific params used in a launch request."""
 
     def launch(self, cmdline):
-        # FIXME: This should probably not a warning, not an assert.
+        # FIXME: Should this be a warning or exception, rather than assert?
         assert (
             len(self.file_to_bp)
             + len(self.function_bp_info)
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
index bdb3095bff913..dec12b9bc83da 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py
@@ -529,7 +529,7 @@ def _confirm_triggered_breakpoint_ids(self, dex_bp_ids):
         confirmed_breakpoint_ids = set()
         for dex_bp_id in dex_bp_ids:
             # Function and instruction breakpoints don't use conditions.
-            # FIXME: That's not a DAP restruction, so they could in future.
+            # FIXME: That's not a DAP restriction, so they could in future.
             if dex_bp_id not in self.bp_info:
                 assert (
                     dex_bp_id in self.function_bp_info



More information about the llvm-commits mailing list