[llvm-branch-commits] [llvm] [Dexter] Add !address node (PR #202801)

Mon Jun 15 04:39:34 PDT 2026

https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/202801

>From f3cd2c3858f051e38fadfa42c361c99dbe5a5ef7 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 9 Jun 2026 20:31:00 +0100
Subject: [PATCH 1/2] [Dexter] Add !address node

Adds a node type for Dexter that allows checking abstract labels instead of
concrete addresses. Each address node has a label and optional offset, and
the first time during evaluation that a given address label is matched
against a valid pointer value, the address label will be assigned a value
that matches the seen address (adjusting for any offset). From that point,
the resolved address value will be used for the remainder of the test
evaluation.
---
 .../dexter/dex/evaluation/ExpectMatch.py      | 129 ++++++++++++++----
 .../dexter/dex/evaluation/RunMatch.py         |  21 ++-
 .../dexter/dex/test_script/Nodes.py           |  46 +++++++
 .../scripts/evaluation/eval_address.cpp       |  64 +++++++++
 .../scripts/parser/invalid-address.test       |  26 ++++
 .../scripts/parser/parse-address.test         |  15 ++
 6 files changed, 266 insertions(+), 35 deletions(-)
 create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
 create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test
 create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py
index c9047c6f7d80c..8a001539891a3 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py
@@ -7,11 +7,12 @@
 """Utilities for matching debugger output to script expected values."""
 
 from collections import Counter, OrderedDict
+import copy
 from enum import Enum, IntEnum
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from dex.dextIR import ValueIR
-from dex.test_script.Nodes import Expect, Value
+from dex.test_script.Nodes import Expect, Value, Address
 
 
 def get_expected_value_set(
@@ -53,11 +54,47 @@ def get_expected_value_set(
     return result
 
 
+class ExpectMatchContext:
+    """Context class used to track evaluation state across variables/steps. Updated as new matches are made; since we
+    try many matches and select the best one, we avoid committing any updates to this context until we have selected
+    the final match."""
+
+    def __init__(self):
+        self.address_label_resolutions: Dict[str, int] = {}
+
+    def commit(self, other: "ExpectMatchContext"):
+        assert all(
+            other.address_label_resolutions.get(addr)
+            == self.address_label_resolutions[addr]
+            for addr in self.address_label_resolutions
+        ), "New committed address resolutions override existing resolutions!"
+        self.address_label_resolutions = other.address_label_resolutions
+
+
 class MatchResult(IntEnum):
     FALSE = 0
     PARTIAL = 1
     TRUE = 2
 
+    @staticmethod
+    def from_bools(is_true: bool, is_false: Optional[bool] = None) -> "MatchResult":
+        """Returns a MatchResult based on the provided boolean value(s):
+        - The single argument case simply returns TRUE if the argument is True, and FALSE otherwise.
+        - The two argument case combines its arguments, giving TRUE if `is_true and not is_false`, FALSE for the
+          inverse, and PARTIAL if `is_true and is_false`. Currently rejects `not is_true and not is_false`, as we don't
+          intend to represent this state with a MatchResult.
+        """
+        if is_false is None:
+            is_false = not is_true
+        if is_true and not is_false:
+            return MatchResult.TRUE
+        if is_false and not is_true:
+            return MatchResult.FALSE
+        assert (
+            is_false and is_true
+        ), "Invalid inputs to MatchResult; cannot be not false and not true."
+        return MatchResult.PARTIAL
+
 
 class DebuggerExpectMatch:
     """Class that represents the match between a particular expected value for an Expect node and the actual debugger
@@ -65,12 +102,21 @@ class DebuggerExpectMatch:
     `actual_result` is None if `actual` or `expect.get_variable_result(actual)` is None,
     Otherwise, if `expected` is a dict, then `actual_result` is a dict[str, DebuggerExpectMatch],
     Otherwise, `actual_result` is a str.
+    Uses the provided match_context, and updates a local copy of it; if this match is selected, then its local updated
+    match_context should be committed.
     """
 
-    def __init__(self, expect: Expect, expected, actual: Optional[ValueIR]):
+    def __init__(
+        self,
+        expect: Expect,
+        expected,
+        actual: Optional[ValueIR],
+        match_context: ExpectMatchContext,
+    ):
         self.expect = expect
         self.expected = expected
         self.actual = actual
+        self.match_context = copy.deepcopy(match_context)
         self.actual_result, self.match_result = self._get_actual_result()
         self.match_distance = self._get_match_distance()
 
@@ -93,20 +139,18 @@ def _get_actual_result(
                     )
                 )
                 sub_expect_results[sub_expect] = DebuggerExpectMatch(
-                    self.expect, sub_expected, value
+                    self.expect, sub_expected, value, self.match_context
                 )
-            if all(
-                result.match_result == MatchResult.TRUE
-                for result in sub_expect_results.values()
-            ):
-                match_result = MatchResult.TRUE
-            elif all(
-                result.match_result == MatchResult.FALSE
-                for result in sub_expect_results.values()
-            ):
-                match_result = MatchResult.FALSE
-            else:
-                match_result = MatchResult.PARTIAL
+            match_result = MatchResult.from_bools(
+                any(
+                    result.match_result == MatchResult.TRUE
+                    for result in sub_expect_results.values()
+                ),
+                any(
+                    result.match_result == MatchResult.FALSE
+                    for result in sub_expect_results.values()
+                ),
+            )
             return sub_expect_results, match_result
 
         actual_result = (
@@ -114,11 +158,32 @@ def _get_actual_result(
             if self.actual is not None
             else None
         )
-        match_result = (
-            MatchResult.TRUE
-            if (self.expected is not None and str(self.expected) == actual_result)
-            else MatchResult.FALSE
-        )
+        if self.expected is None or actual_result is None:
+            return actual_result, MatchResult.FALSE
+        if isinstance(self.expected, Address):
+            # First check whether the actual value we have is an address.
+            try:
+                actual_addr = int(actual_result.split(maxsplit=1)[0], 16)
+            except ValueError:
+                # Not a valid address, so we can't match.
+                return actual_result, MatchResult.FALSE
+            # If the address is already resolved, we just have to see if it matches.
+            if (
+                resolved_addr := self.match_context.address_label_resolutions.get(
+                    self.expected.name
+                )
+            ) is not None:
+                return actual_result, MatchResult.from_bools(
+                    resolved_addr + self.expected.offset == actual_addr
+                )
+            # If the address is not resolved, then we can assign to it now in our local copy.
+            resolved_addr = actual_addr - self.expected.offset
+            self.match_context.address_label_resolutions[
+                self.expected.name
+            ] = resolved_addr
+            return actual_result, MatchResult.TRUE
+
+        match_result = MatchResult.from_bools(str(self.expected) == actual_result)
         return actual_result, match_result
 
     def _get_match_distance(self) -> float:
@@ -184,20 +249,26 @@ def colorize(input: str, match_result: MatchResult) -> str:
         return f"{{ {', '.join(sub_values)} }}"
 
 
-def get_expect_match(expect: Expect, expected_values, actual: ValueIR):
+def get_expect_match(
+    expect: Expect, expected_values, actual: ValueIR, match_context: ExpectMatchContext
+):
     """Given one or more expected values for an Expect node and an actual ValueIR, returns a match for the first
     matching expected values, or for None if there are no matching expected values."""
     if not isinstance(expected_values, list):
         expected_values = [expected_values]
-    best_partial_match = DebuggerExpectMatch(expect, None, actual)
-    best_partial_match_dist = 1.0
+    best_match = DebuggerExpectMatch(expect, None, actual, match_context)
+    best_match_dist = 1.0
     for expected_value in expected_values:
-        expect_match = DebuggerExpectMatch(expect, expected_value, actual)
+        expect_match = DebuggerExpectMatch(
+            expect, expected_value, actual, match_context
+        )
         if expect_match.match_result == MatchResult.TRUE:
-            return expect_match
+            best_match = expect_match
+            break
         # A "FALSE" match  will have a match distance of 1.0, and therefore will never be considered a "best match".
-        if expect_match.match_distance < best_partial_match_dist:
-            best_partial_match = expect_match
-            best_partial_match_dist = expect_match.match_distance
+        if expect_match.match_distance < best_match_dist:
+            best_match = expect_match
+            best_match_dist = expect_match.match_distance
 
-    return best_partial_match
+    match_context.commit(best_match.match_context)
+    return best_match
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py
index a7112ce4c5b30..73b12a918bd5f 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py
@@ -16,6 +16,7 @@
 from dex.dextIR import DextIR, StepIR
 from dex.evaluation.ExpectMatch import (
     DebuggerExpectMatch,
+    ExpectMatchContext,
     MatchResult,
     get_expect_match,
 )
@@ -28,15 +29,17 @@
 from dex.test_script import DexterScript, Scope
 from dex.test_script.Nodes import Expect, Value
 
-
 class DebuggerStepMatch:
     """Class used to record the match between a DexterScript and a StepIR, including the state match, determining which
     script nodes are "active", and the expect matches, which compare the debugger's output to the DexterScript's
     expected output."""
 
-    def __init__(self, step: StepIR, script: DexterScript):
+    def __init__(
+        self, step: StepIR, script: DexterScript, match_context: ExpectMatchContext
+    ):
         self.step = step
         self.script = script
+        self.match_context = match_context
         self.state_match = get_active_where_matches(script, step)
         expects_to_match = {
             expect
@@ -49,7 +52,10 @@ def add_expected_values(expect: Expect, expected_value: Any, scope: Scope):
             assert isinstance(expect, Value), "Non-Value expects currently unsupported"
             if expect in expects_to_match:
                 self.expect_matches[expect] = get_expect_match(
-                    expect, expected_value, step.watches[expect.get_watched_expr()]
+                    expect,
+                    expected_value,
+                    step.watches[expect.get_watched_expr()],
+                    self.match_context,
                 )
 
         script.visit_script(visit_expect=add_expected_values)
@@ -62,8 +68,9 @@ class DebuggerRunMatch(object):
     affect the match of another variable at step N+1, thus we go one step at a time.
     """
 
-    def __init__(self, context, dext_ir: DextIR):
-        self.context = context
+    def __init__(self, dex_context, dext_ir: DextIR):
+        self.dex_context = dex_context
+        self.match_context = ExpectMatchContext()
         self.dext_ir = dext_ir
         self.metrics: Dict[str, Metric] = {}
         self.step_matches: List[DebuggerStepMatch] = []
@@ -86,7 +93,9 @@ def add_expected_values(expect: Expect, expected_value: Any, scope: Scope):
 
         # Then produce all of our step matches.
         for step in self.dext_ir.steps:
-            self.step_matches.append(DebuggerStepMatch(step, script))
+            self.step_matches.append(
+                DebuggerStepMatch(step, script, self.match_context)
+            )
 
         # Then, for each expect, produce the list of results for just that variable.
         for step_match in self.step_matches:
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py
index a690fcd98ec1b..6d9f4f8ef5080 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py
@@ -24,6 +24,7 @@ def setup_yaml_parser(loader):
         DexRange,
         Label,
         Then,
+        Address,
     ]
     for c in reg_classes:
         c.register_yaml(loader)
@@ -236,6 +237,51 @@ def register_yaml(loader):
 ## Utility Nodes: Can be used anywhere in a script as a form of syntactic sugar.
 
 
+class Address:
+    """Named label for an address, which may resolve to different values with each test run, but will resolve
+    consistently within a test run."""
+
+    def __init__(self, name: str, offset: int):
+        self.name = name
+        self.offset = offset
+        if not re.match(r"^([a-zA-Z_]\w*)$", name):
+            raise DexterNodeError(self, f'Invalid !address identifier "{name}"')
+
+    def __repr__(self):
+        if not self.offset:
+            offset_str = ""
+        elif self.offset > 0:
+            offset_str = f" + {self.offset}"
+        else:
+            offset_str = f" - {-self.offset}"
+        return f"Address({self.name}{offset_str})"
+
+    @staticmethod
+    def constructor(loader, node):
+        address_str = str(loader.construct_scalar(node)).strip()
+        offset = 0
+        if match := re.match(r"^([a-zA-Z_]\w*)\s*([+-])\s*(\d+)$", address_str):
+            identifier, sign, number = match.groups()
+            offset = int(number) if sign == "+" else -int(number)
+            address_str = identifier
+        return Address(address_str, offset)
+
+    @staticmethod
+    def representer(dumper, data: "Address"):
+        if not data.offset:
+            offset_str = ""
+        elif data.offset > 0:
+            offset_str = f"+{data.offset}"
+        else:
+            offset_str = f"-{-data.offset}"
+        return dumper.represent_scalar("!address", data.name + offset_str)
+
+    @staticmethod
+    def register_yaml(loader):
+        yaml.add_constructor("!address", Address.constructor, loader)
+        yaml.add_representer(Address, Address.representer)
+
+
 @dataclass(frozen=True)
 class Line:
     """Union class between an int or a Label, used to represent lines inside of Nodes."""
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
new file mode 100644
index 0000000000000..17744db7a4711
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
@@ -0,0 +1,64 @@
+// RUN: %dexter_regression_test_cxx_build %s -o %t
+// RUN: %dexter_regression_test_run --use-script --binary %t -- %s | FileCheck %s
+
+// Test evaluation of !address nodes in Dexter.
+
+// CHECK:      Non-matching nodes:
+// CHECK-SAME: Value(FalseStart)
+// CHECK:      Non-matching nodes:
+// CHECK-SAME: Value(EvenFalserStart)
+// CHECK-NOT: Non-matching nodes
+
+// CHECK: total_watched_steps: 12
+// CHECK: correct_steps: 10
+// CHECK: incorrect_steps: 2
+// CHECK: missing_var_steps: 0
+// CHECK: unexpected_value_steps: 2
+// CHECK: seen_values: 11
+// CHECK: missing_values: 2
+
+struct SubRange {
+  char *Begin;
+  int Length;
+};
+
+int main() {
+  char Data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+  char *Start = Data;
+  char *FalseStart = Data + 1;
+  char *EvenFalserStart = Data + 2;
+  char *Middle = Data + 5; // !dex_label begin
+  char *NearEnd = Data + 8;
+  char *Pos = Data + 4;
+  for (int I = 0; I < 6; ++I) {
+    Pos = Pos + 1; // !dex_label loop
+  }
+  SubRange Range = {Data + 2, 4};
+  return 0; // !dex_label ret
+}
+
+/*
+---
+# `Start` will be correct and `FalseStart` will be incorrect, because `Start` is evaluated first.
+!where {lines: !label begin}:
+    !value Start: !address data
+    !value FalseStart: !address data
+# `EvenFalserStart` will also be incorrect, because it has been evaluated later.
+!where {lines: !label begin + 1}:
+    !value EvenFalserStart: !address data
+!where {lines: !label loop}:
+    !value Pos:
+    - !address data + 4
+    - !address data + 5
+    - !address data + 6
+    - !address data + 7
+    - !address data + 8
+    - !address data + 9
+!where {lines: !label ret}:
+    !value Middle: !address data + 5
+    !value NearEnd: !address end - 2
+    !value Range:
+        Begin: !address data + 2
+        Length: 4
+...
+*/
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test
new file mode 100644
index 0000000000000..beae06b2eae0e
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test
@@ -0,0 +1,26 @@
+RUN: not %dexter_regression_test_run --binary %s --use-script --skip-run -- %s 2>&1 | FileCheck %s
+
+Tests that we reject ill-formed addresses.
+
+CHECK: No valid Dexter script found in file
+
+CHECK: Script starting line [[# @LINE + 2]]:
+CHECK: Error with node: Address(foo + bar): Invalid !address identifier "foo + bar"
+---
+!where {function: foo}:
+    !value a: !address foo + bar
+...
+
+CHECK: Script starting line [[# @LINE + 2]]:
+CHECK: Error with node: Address(24): Invalid !address identifier "24"
+---
+!where {function: foo}:
+    !value a: !address 24
+...
+
+CHECK: Script starting line [[# @LINE + 2]]:
+CHECK: Error with node: Address(something something): Invalid !address identifier "something something"
+---
+!where {function: foo}:
+    !value a: !address something something
+...
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test
new file mode 100644
index 0000000000000..de9327b607a04
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test
@@ -0,0 +1,15 @@
+RUN: %dexter_regression_test_run --binary %s --use-script --skip-run -- %s 2>&1 | FileCheck %s
+
+Tests that we can correctly parse+print !address nodes.
+
+CHECK:      ? !where {function: foo}
+CHECK-NEXT: : !value 'a': !address 'foo'
+CHECK-NEXT:   !value 'b': !address 'foo+1'
+CHECK-NEXT:   !value 'c': !address '_bar-12'
+
+---
+!where {function: foo}:
+    !value a: !address foo
+    !value b: !address foo + 1
+    !value c: !address _bar -12
+...

>From 2d6964fc160dc8f82e8bcb2a17e095d1369ce1eb Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 10 Jun 2026 13:27:06 +0100
Subject: [PATCH 2/2] format

---
 .../feature_tests/scripts/evaluation/eval_address.cpp     | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
index 17744db7a4711..603497baa1cc5 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
@@ -1,5 +1,6 @@
 // RUN: %dexter_regression_test_cxx_build %s -o %t
-// RUN: %dexter_regression_test_run --use-script --binary %t -- %s | FileCheck %s
+// RUN: %dexter_regression_test_run --use-script --binary %t -- %s \
+// RUN:   | FileCheck %s
 
 // Test evaluation of !address nodes in Dexter.
 
@@ -23,7 +24,7 @@ struct SubRange {
 };
 
 int main() {
-  char Data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+  char Data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
   char *Start = Data;
   char *FalseStart = Data + 1;
   char *EvenFalserStart = Data + 2;
@@ -39,7 +40,8 @@ int main() {
 
 /*
 ---
-# `Start` will be correct and `FalseStart` will be incorrect, because `Start` is evaluated first.
+# `Start` will be correct and `FalseStart` will be incorrect, because `Start` is
+# evaluated first.
 !where {lines: !label begin}:
     !value Start: !address data
     !value FalseStart: !address data