[llvm-branch-commits] [llvm] [Dexter] Add !address node (PR #202801)
Stephen Tozer via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jun 15 04:39:34 PDT 2026
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/202801
>From f3cd2c3858f051e38fadfa42c361c99dbe5a5ef7 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 9 Jun 2026 20:31:00 +0100
Subject: [PATCH 1/2] [Dexter] Add !address node
Adds a node type for Dexter that allows checking abstract labels instead of
concrete addresses. Each address node has a label and optional offset, and
the first time during evaluation that a given address label is matched
against a valid pointer value, the address label will be assigned a value
that matches the seen address (adjusting for any offset). From that point,
the resolved address value will be used for the remainder of the test
evaluation.
---
.../dexter/dex/evaluation/ExpectMatch.py | 129 ++++++++++++++----
.../dexter/dex/evaluation/RunMatch.py | 21 ++-
.../dexter/dex/test_script/Nodes.py | 46 +++++++
.../scripts/evaluation/eval_address.cpp | 64 +++++++++
.../scripts/parser/invalid-address.test | 26 ++++
.../scripts/parser/parse-address.test | 15 ++
6 files changed, 266 insertions(+), 35 deletions(-)
create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test
create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py
index c9047c6f7d80c..8a001539891a3 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectMatch.py
@@ -7,11 +7,12 @@
"""Utilities for matching debugger output to script expected values."""
from collections import Counter, OrderedDict
+import copy
from enum import Enum, IntEnum
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from dex.dextIR import ValueIR
-from dex.test_script.Nodes import Expect, Value
+from dex.test_script.Nodes import Expect, Value, Address
def get_expected_value_set(
@@ -53,11 +54,47 @@ def get_expected_value_set(
return result
+class ExpectMatchContext:
+ """Context class used to track evaluation state across variables/steps. Updated as new matches are made; since we
+ try many matches and select the best one, we avoid committing any updates to this context until we have selected
+ the final match."""
+
+ def __init__(self):
+ self.address_label_resolutions: Dict[str, int] = {}
+
+ def commit(self, other: "ExpectMatchContext"):
+ assert all(
+ other.address_label_resolutions.get(addr)
+ == self.address_label_resolutions[addr]
+ for addr in self.address_label_resolutions
+ ), "New committed address resolutions override existing resolutions!"
+ self.address_label_resolutions = other.address_label_resolutions
+
+
class MatchResult(IntEnum):
FALSE = 0
PARTIAL = 1
TRUE = 2
+ @staticmethod
+ def from_bools(is_true: bool, is_false: Optional[bool] = None) -> "MatchResult":
+ """Returns a MatchResult based on the provided boolean value(s):
+ - The single argument case simply returns TRUE if the argument is True, and FALSE otherwise.
+ - The two argument case combines its arguments, giving TRUE if `is_true and not is_false`, FALSE for the
+ inverse, and PARTIAL if `is_true and is_false`. Currently rejects `not is_true and not is_false`, as we don't
+ intend to represent this state with a MatchResult.
+ """
+ if is_false is None:
+ is_false = not is_true
+ if is_true and not is_false:
+ return MatchResult.TRUE
+ if is_false and not is_true:
+ return MatchResult.FALSE
+ assert (
+ is_false and is_true
+ ), "Invalid inputs to MatchResult; cannot be not false and not true."
+ return MatchResult.PARTIAL
+
class DebuggerExpectMatch:
"""Class that represents the match between a particular expected value for an Expect node and the actual debugger
@@ -65,12 +102,21 @@ class DebuggerExpectMatch:
`actual_result` is None if `actual` or `expect.get_variable_result(actual)` is None,
Otherwise, if `expected` is a dict, then `actual_result` is a dict[str, DebuggerExpectMatch],
Otherwise, `actual_result` is a str.
+ Uses the provided match_context, and updates a local copy of it; if this match is selected, then its local updated
+ match_context should be committed.
"""
- def __init__(self, expect: Expect, expected, actual: Optional[ValueIR]):
+ def __init__(
+ self,
+ expect: Expect,
+ expected,
+ actual: Optional[ValueIR],
+ match_context: ExpectMatchContext,
+ ):
self.expect = expect
self.expected = expected
self.actual = actual
+ self.match_context = copy.deepcopy(match_context)
self.actual_result, self.match_result = self._get_actual_result()
self.match_distance = self._get_match_distance()
@@ -93,20 +139,18 @@ def _get_actual_result(
)
)
sub_expect_results[sub_expect] = DebuggerExpectMatch(
- self.expect, sub_expected, value
+ self.expect, sub_expected, value, self.match_context
)
- if all(
- result.match_result == MatchResult.TRUE
- for result in sub_expect_results.values()
- ):
- match_result = MatchResult.TRUE
- elif all(
- result.match_result == MatchResult.FALSE
- for result in sub_expect_results.values()
- ):
- match_result = MatchResult.FALSE
- else:
- match_result = MatchResult.PARTIAL
+ match_result = MatchResult.from_bools(
+ any(
+ result.match_result == MatchResult.TRUE
+ for result in sub_expect_results.values()
+ ),
+ any(
+ result.match_result == MatchResult.FALSE
+ for result in sub_expect_results.values()
+ ),
+ )
return sub_expect_results, match_result
actual_result = (
@@ -114,11 +158,32 @@ def _get_actual_result(
if self.actual is not None
else None
)
- match_result = (
- MatchResult.TRUE
- if (self.expected is not None and str(self.expected) == actual_result)
- else MatchResult.FALSE
- )
+ if self.expected is None or actual_result is None:
+ return actual_result, MatchResult.FALSE
+ if isinstance(self.expected, Address):
+ # First check whether the actual value we have is an address.
+ try:
+ actual_addr = int(actual_result.split(maxsplit=1)[0], 16)
+ except ValueError:
+ # Not a valid address, so we can't match.
+ return actual_result, MatchResult.FALSE
+ # If the address is already resolved, we just have to see if it matches.
+ if (
+ resolved_addr := self.match_context.address_label_resolutions.get(
+ self.expected.name
+ )
+ ) is not None:
+ return actual_result, MatchResult.from_bools(
+ resolved_addr + self.expected.offset == actual_addr
+ )
+ # If the address is not resolved, then we can assign to it now in our local copy.
+ resolved_addr = actual_addr - self.expected.offset
+ self.match_context.address_label_resolutions[
+ self.expected.name
+ ] = resolved_addr
+ return actual_result, MatchResult.TRUE
+
+ match_result = MatchResult.from_bools(str(self.expected) == actual_result)
return actual_result, match_result
def _get_match_distance(self) -> float:
@@ -184,20 +249,26 @@ def colorize(input: str, match_result: MatchResult) -> str:
return f"{{ {', '.join(sub_values)} }}"
-def get_expect_match(expect: Expect, expected_values, actual: ValueIR):
+def get_expect_match(
+ expect: Expect, expected_values, actual: ValueIR, match_context: ExpectMatchContext
+):
"""Given one or more expected values for an Expect node and an actual ValueIR, returns a match for the first
matching expected values, or for None if there are no matching expected values."""
if not isinstance(expected_values, list):
expected_values = [expected_values]
- best_partial_match = DebuggerExpectMatch(expect, None, actual)
- best_partial_match_dist = 1.0
+ best_match = DebuggerExpectMatch(expect, None, actual, match_context)
+ best_match_dist = 1.0
for expected_value in expected_values:
- expect_match = DebuggerExpectMatch(expect, expected_value, actual)
+ expect_match = DebuggerExpectMatch(
+ expect, expected_value, actual, match_context
+ )
if expect_match.match_result == MatchResult.TRUE:
- return expect_match
+ best_match = expect_match
+ break
# A "FALSE" match will have a match distance of 1.0, and therefore will never be considered a "best match".
- if expect_match.match_distance < best_partial_match_dist:
- best_partial_match = expect_match
- best_partial_match_dist = expect_match.match_distance
+ if expect_match.match_distance < best_match_dist:
+ best_match = expect_match
+ best_match_dist = expect_match.match_distance
- return best_partial_match
+ match_context.commit(best_match.match_context)
+ return best_match
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py
index a7112ce4c5b30..73b12a918bd5f 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/RunMatch.py
@@ -16,6 +16,7 @@
from dex.dextIR import DextIR, StepIR
from dex.evaluation.ExpectMatch import (
DebuggerExpectMatch,
+ ExpectMatchContext,
MatchResult,
get_expect_match,
)
@@ -28,15 +29,17 @@
from dex.test_script import DexterScript, Scope
from dex.test_script.Nodes import Expect, Value
-
class DebuggerStepMatch:
"""Class used to record the match between a DexterScript and a StepIR, including the state match, determining which
script nodes are "active", and the expect matches, which compare the debugger's output to the DexterScript's
expected output."""
- def __init__(self, step: StepIR, script: DexterScript):
+ def __init__(
+ self, step: StepIR, script: DexterScript, match_context: ExpectMatchContext
+ ):
self.step = step
self.script = script
+ self.match_context = match_context
self.state_match = get_active_where_matches(script, step)
expects_to_match = {
expect
@@ -49,7 +52,10 @@ def add_expected_values(expect: Expect, expected_value: Any, scope: Scope):
assert isinstance(expect, Value), "Non-Value expects currently unsupported"
if expect in expects_to_match:
self.expect_matches[expect] = get_expect_match(
- expect, expected_value, step.watches[expect.get_watched_expr()]
+ expect,
+ expected_value,
+ step.watches[expect.get_watched_expr()],
+ self.match_context,
)
script.visit_script(visit_expect=add_expected_values)
@@ -62,8 +68,9 @@ class DebuggerRunMatch(object):
affect the match of another variable at step N+1, thus we go one step at a time.
"""
- def __init__(self, context, dext_ir: DextIR):
- self.context = context
+ def __init__(self, dex_context, dext_ir: DextIR):
+ self.dex_context = dex_context
+ self.match_context = ExpectMatchContext()
self.dext_ir = dext_ir
self.metrics: Dict[str, Metric] = {}
self.step_matches: List[DebuggerStepMatch] = []
@@ -86,7 +93,9 @@ def add_expected_values(expect: Expect, expected_value: Any, scope: Scope):
# Then produce all of our step matches.
for step in self.dext_ir.steps:
- self.step_matches.append(DebuggerStepMatch(step, script))
+ self.step_matches.append(
+ DebuggerStepMatch(step, script, self.match_context)
+ )
# Then, for each expect, produce the list of results for just that variable.
for step_match in self.step_matches:
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py
index a690fcd98ec1b..6d9f4f8ef5080 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Nodes.py
@@ -24,6 +24,7 @@ def setup_yaml_parser(loader):
DexRange,
Label,
Then,
+ Address,
]
for c in reg_classes:
c.register_yaml(loader)
@@ -236,6 +237,51 @@ def register_yaml(loader):
## Utility Nodes: Can be used anywhere in a script as a form of syntactic sugar.
+class Address:
+ """Named label for an address, which may resolve to different values with each test run, but will resolve
+ consistently within a test run."""
+
+ def __init__(self, name: str, offset: int):
+ self.name = name
+ self.offset = offset
+ if not re.match(r"^([a-zA-Z_]\w*)$", name):
+ raise DexterNodeError(self, f'Invalid !address identifier "{name}"')
+
+ def __repr__(self):
+ if not self.offset:
+ offset_str = ""
+ elif self.offset > 0:
+ offset_str = f" + {self.offset}"
+ else:
+ offset_str = f" - {-self.offset}"
+ return f"Address({self.name}{offset_str})"
+
+ @staticmethod
+ def constructor(loader, node):
+ address_str = str(loader.construct_scalar(node)).strip()
+ offset = 0
+ if match := re.match(r"^([a-zA-Z_]\w*)\s*([+-])\s*(\d+)$", address_str):
+ identifier, sign, number = match.groups()
+ offset = int(number) if sign == "+" else -int(number)
+ address_str = identifier
+ return Address(address_str, offset)
+
+ @staticmethod
+ def representer(dumper, data: "Address"):
+ if not data.offset:
+ offset_str = ""
+ elif data.offset > 0:
+ offset_str = f"+{data.offset}"
+ else:
+ offset_str = f"-{-data.offset}"
+ return dumper.represent_scalar("!address", data.name + offset_str)
+
+ @staticmethod
+ def register_yaml(loader):
+ yaml.add_constructor("!address", Address.constructor, loader)
+ yaml.add_representer(Address, Address.representer)
+
+
@dataclass(frozen=True)
class Line:
"""Union class between an int or a Label, used to represent lines inside of Nodes."""
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
new file mode 100644
index 0000000000000..17744db7a4711
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
@@ -0,0 +1,64 @@
+// RUN: %dexter_regression_test_cxx_build %s -o %t
+// RUN: %dexter_regression_test_run --use-script --binary %t -- %s | FileCheck %s
+
+// Test evaluation of !address nodes in Dexter.
+
+// CHECK: Non-matching nodes:
+// CHECK-SAME: Value(FalseStart)
+// CHECK: Non-matching nodes:
+// CHECK-SAME: Value(EvenFalserStart)
+// CHECK-NOT: Non-matching nodes
+
+// CHECK: total_watched_steps: 12
+// CHECK: correct_steps: 10
+// CHECK: incorrect_steps: 2
+// CHECK: missing_var_steps: 0
+// CHECK: unexpected_value_steps: 2
+// CHECK: seen_values: 11
+// CHECK: missing_values: 2
+
+struct SubRange {
+ char *Begin;
+ int Length;
+};
+
+int main() {
+ char Data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ char *Start = Data;
+ char *FalseStart = Data + 1;
+ char *EvenFalserStart = Data + 2;
+ char *Middle = Data + 5; // !dex_label begin
+ char *NearEnd = Data + 8;
+ char *Pos = Data + 4;
+ for (int I = 0; I < 6; ++I) {
+ Pos = Pos + 1; // !dex_label loop
+ }
+ SubRange Range = {Data + 2, 4};
+ return 0; // !dex_label ret
+}
+
+/*
+---
+# `Start` will be correct and `FalseStart` will be incorrect, because `Start` is evaluated first.
+!where {lines: !label begin}:
+ !value Start: !address data
+ !value FalseStart: !address data
+# `EvenFalserStart` will also be incorrect, because it has been evaluated later.
+!where {lines: !label begin + 1}:
+ !value EvenFalserStart: !address data
+!where {lines: !label loop}:
+ !value Pos:
+ - !address data + 4
+ - !address data + 5
+ - !address data + 6
+ - !address data + 7
+ - !address data + 8
+ - !address data + 9
+!where {lines: !label ret}:
+ !value Middle: !address data + 5
+ !value NearEnd: !address end - 2
+ !value Range:
+ Begin: !address data + 2
+ Length: 4
+...
+*/
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test
new file mode 100644
index 0000000000000..beae06b2eae0e
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/invalid-address.test
@@ -0,0 +1,26 @@
+RUN: not %dexter_regression_test_run --binary %s --use-script --skip-run -- %s 2>&1 | FileCheck %s
+
+Tests that we reject ill-formed addresses.
+
+CHECK: No valid Dexter script found in file
+
+CHECK: Script starting line [[# @LINE + 2]]:
+CHECK: Error with node: Address(foo + bar): Invalid !address identifier "foo + bar"
+---
+!where {function: foo}:
+ !value a: !address foo + bar
+...
+
+CHECK: Script starting line [[# @LINE + 2]]:
+CHECK: Error with node: Address(24): Invalid !address identifier "24"
+---
+!where {function: foo}:
+ !value a: !address 24
+...
+
+CHECK: Script starting line [[# @LINE + 2]]:
+CHECK: Error with node: Address(something something): Invalid !address identifier "something something"
+---
+!where {function: foo}:
+ !value a: !address something something
+...
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test
new file mode 100644
index 0000000000000..de9327b607a04
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/parser/parse-address.test
@@ -0,0 +1,15 @@
+RUN: %dexter_regression_test_run --binary %s --use-script --skip-run -- %s 2>&1 | FileCheck %s
+
+Tests that we can correctly parse+print !address nodes.
+
+CHECK: ? !where {function: foo}
+CHECK-NEXT: : !value 'a': !address 'foo'
+CHECK-NEXT: !value 'b': !address 'foo+1'
+CHECK-NEXT: !value 'c': !address '_bar-12'
+
+---
+!where {function: foo}:
+ !value a: !address foo
+ !value b: !address foo + 1
+ !value c: !address _bar -12
+...
>From 2d6964fc160dc8f82e8bcb2a17e095d1369ce1eb Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 10 Jun 2026 13:27:06 +0100
Subject: [PATCH 2/2] format
---
.../feature_tests/scripts/evaluation/eval_address.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
index 17744db7a4711..603497baa1cc5 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/evaluation/eval_address.cpp
@@ -1,5 +1,6 @@
// RUN: %dexter_regression_test_cxx_build %s -o %t
-// RUN: %dexter_regression_test_run --use-script --binary %t -- %s | FileCheck %s
+// RUN: %dexter_regression_test_run --use-script --binary %t -- %s \
+// RUN: | FileCheck %s
// Test evaluation of !address nodes in Dexter.
@@ -23,7 +24,7 @@ struct SubRange {
};
int main() {
- char Data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ char Data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
char *Start = Data;
char *FalseStart = Data + 1;
char *EvenFalserStart = Data + 2;
@@ -39,7 +40,8 @@ int main() {
/*
---
-# `Start` will be correct and `FalseStart` will be incorrect, because `Start` is evaluated first.
+# `Start` will be correct and `FalseStart` will be incorrect, because `Start` is
+# evaluated first.
!where {lines: !label begin}:
!value Start: !address data
!value FalseStart: !address data
More information about the llvm-branch-commits
mailing list