[Lldb-commits] [lldb] [lldb][bytecode] Add Python to formatter bytecode compiler (PR #113734)
Dave Lee via lldb-commits
lldb-commits at lists.llvm.org
Fri Mar 6 13:53:49 PST 2026
https://github.com/kastiglione updated https://github.com/llvm/llvm-project/pull/113734
>From 0f1c5ff8b0556d8e7e69f3ec9c6a71784304a2b1 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Fri, 25 Oct 2024 12:56:00 -0700
Subject: [PATCH 1/8] [lldb] Proof of concept data formatter compiler for
Python
---
.../formatter-bytecode/optional_summary.py | 14 ++
.../formatter-bytecode/python_to_assembly.py | 145 ++++++++++++++++++
2 files changed, 159 insertions(+)
create mode 100644 lldb/examples/formatter-bytecode/optional_summary.py
create mode 100755 lldb/examples/formatter-bytecode/python_to_assembly.py
diff --git a/lldb/examples/formatter-bytecode/optional_summary.py b/lldb/examples/formatter-bytecode/optional_summary.py
new file mode 100644
index 0000000000000..68e672d86613d
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/optional_summary.py
@@ -0,0 +1,14 @@
+def OptionalSummaryProvider(valobj, _):
+ failure = 2
+ storage = valobj.GetChildMemberWithName("Storage")
+ hasVal = storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure)
+ if hasVal == failure:
+ return "<could not read Optional>"
+
+ if hasVal == 0:
+ return "None"
+
+ underlying_type = storage.GetType().GetTemplateArgumentType(0)
+ value = storage.GetChildMemberWithName("value")
+ value = value.Cast(underlying_type)
+ return value.GetSummary()
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py
new file mode 100755
index 0000000000000..6e2adbe093fda
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -0,0 +1,145 @@
+#!/usr/bin/python3
+
+import ast
+import io
+import sys
+from typing import Any
+
+BUILTINS = {
+ "Cast": "@cast",
+ "GetChildMemberWithName": "@get_child_with_name",
+ "GetSummary": "@get_summary",
+ "GetTemplateArgumentType": "@get_template_argument_type",
+ "GetType": "@get_type",
+ "GetValueAsUnsigned": "@get_value_as_unsigned",
+}
+
+COMPS = {
+ ast.Eq: "=",
+ ast.NotEq: "!=",
+ ast.Lt: "<",
+ ast.LtE: "=<",
+ ast.Gt: ">",
+ ast.GtE: "=>",
+}
+
+class Compiler(ast.NodeVisitor):
+ # Track the stack index of locals variables.
+ #
+ # This is essentially an ordered dictionary, where the key is an index on
+ # the stack, and the value is the name of the variable whose value is at
+ # that index.
+ #
+ # Ex: `locals[0]` is the name of the first value pushed on the stack, etc.
+ locals: list[str]
+
+ buffer: io.StringIO
+ final_buffer: io.StringIO
+
+ def __init__(self) -> None:
+ self.locals = []
+ self.buffer = io.StringIO()
+ self.final_buffer = io.StringIO()
+
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+ # Initialize `locals` with the (positional) arguments.
+ self.locals = [arg.arg for arg in node.args.args]
+ self.generic_visit(node)
+ self.locals.clear()
+
+ def visit_Compare(self, node: ast.Compare) -> None:
+ self.visit(node.left)
+ # XXX: Does not handle multiple comparisons, ex: `0 < x < 10`
+ self.visit(node.comparators[0])
+ self._output(COMPS[type(node.ops[0])])
+
+ def visit_If(self, node: ast.If) -> None:
+ self.visit(node.test)
+
+ # Does the body `return`?
+ has_return = any(isinstance(x, ast.Return) for x in node.body)
+
+ self._output("{")
+ self._visit_each(node.body)
+ if not node.orelse and not has_return:
+ # No else, and no early exit: a simple `if`
+ self._output("} if")
+ return
+
+ self._output("}")
+ if node.orelse:
+ # Handle else.
+ self._output("{")
+ self._visit_each(node.orelse)
+ self._output("} ifelse")
+ elif has_return:
+ # Convert early exit into an `ifelse`.
+ self._output("{")
+ self._output("} ifelse", final=True)
+
+ def visit_Constant(self, node: ast.Constant) -> None:
+ if isinstance(node.value, str):
+ self._output(f'"{node.value}"')
+ elif isinstance(node.value, bool):
+ self._output(int(node.value))
+ else:
+ self._output(node.value)
+
+ def visit_Call(self, node: ast.Call) -> None:
+ if isinstance(node.func, ast.Attribute):
+ # The receiver is the left hande side of the dot.
+ receiver = node.func.value
+ method = node.func.attr
+ if selector := BUILTINS.get(method):
+ # Visit the method's receiver to have its value on the stack.
+ self.visit(receiver)
+ # Visit the args to position them on the stack.
+ self._visit_each(node.args)
+ self._output(f"{selector} call")
+ else:
+ # TODO: fail
+ print(f"error: unsupported method {node.func.attr}", file=sys.stderr)
+
+ def visit_Assign(self, node: ast.Assign) -> None:
+ # Visit RHS first, putting values on the stack.
+ self.visit(node.value)
+ # Determine the name(s). Either a single Name, or a Tuple of Names.
+ target = node.targets[0]
+ if isinstance(target, ast.Name):
+ names = [target.id]
+ elif isinstance(target, ast.Tuple):
+ # These tuple elements are Name nodes.
+ names = [x.id for x in target.elts]
+
+ # Forget any previous bindings of these names.
+ # Their values are orphaned on the stack.
+ for local in self.locals:
+ if local in names:
+ old_idx = self.locals.index(local)
+ self.locals[old_idx] = ""
+
+ self.locals.extend(names)
+
+ def visit_Name(self, node: ast.Name) -> None:
+ idx = self.locals.index(node.id)
+ self._output(f"{idx} pick # {node.id}")
+
+ def _visit_each(self, nodes: list[ast.AST]) -> None:
+ for child in nodes:
+ self.visit(child)
+
+ def _output(self, x: Any, final: bool = False) -> None:
+ dest = self.final_buffer if final else self.buffer
+ print(x, file=dest)
+
+ @property
+ def output(self) -> str:
+ return compiler.buffer.getvalue() + compiler.final_buffer.getvalue()
+
+
+if __name__ == "__main__":
+ with open(sys.argv[1]) as f:
+ root = ast.parse(f.read())
+ compiler = Compiler()
+ compiler.visit(root)
+ print(compiler.output)
>From c8525de8f369b99af869ce22170111ba0fea9b70 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Fri, 3 Jan 2025 14:20:48 -0800
Subject: [PATCH 2/8] Support the return operation
---
.../formatter-bytecode/python_to_assembly.py | 32 +++++++------------
1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py
index 6e2adbe093fda..98c03832227cc 100755
--- a/lldb/examples/formatter-bytecode/python_to_assembly.py
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -34,12 +34,10 @@ class Compiler(ast.NodeVisitor):
locals: list[str]
buffer: io.StringIO
- final_buffer: io.StringIO
def __init__(self) -> None:
self.locals = []
self.buffer = io.StringIO()
- self.final_buffer = io.StringIO()
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
# Initialize `locals` with the (positional) arguments.
@@ -56,26 +54,19 @@ def visit_Compare(self, node: ast.Compare) -> None:
def visit_If(self, node: ast.If) -> None:
self.visit(node.test)
- # Does the body `return`?
- has_return = any(isinstance(x, ast.Return) for x in node.body)
-
self._output("{")
self._visit_each(node.body)
- if not node.orelse and not has_return:
- # No else, and no early exit: a simple `if`
- self._output("} if")
- return
-
- self._output("}")
if node.orelse:
- # Handle else.
- self._output("{")
+ self._output("} {")
self._visit_each(node.orelse)
self._output("} ifelse")
- elif has_return:
- # Convert early exit into an `ifelse`.
- self._output("{")
- self._output("} ifelse", final=True)
+ else:
+ self._output("} if")
+
+ def visit_Return(self, node: ast.Return) -> None:
+ if node.value:
+ self.visit(node.value)
+ self._output("return")
def visit_Constant(self, node: ast.Constant) -> None:
if isinstance(node.value, str):
@@ -128,13 +119,12 @@ def _visit_each(self, nodes: list[ast.AST]) -> None:
for child in nodes:
self.visit(child)
- def _output(self, x: Any, final: bool = False) -> None:
- dest = self.final_buffer if final else self.buffer
- print(x, file=dest)
+ def _output(self, x: Any) -> None:
+ print(x, file=self.buffer)
@property
def output(self) -> str:
- return compiler.buffer.getvalue() + compiler.final_buffer.getvalue()
+ return compiler.buffer.getvalue()
if __name__ == "__main__":
>From c3314558a831ec7c906a3f63616a3cad4fd1ecad Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Tue, 6 Jan 2026 10:49:21 -0800
Subject: [PATCH 3/8] Add Python bytecode translator
---
.../bytecode_to_bytecode.py | 141 ++++++++++++++++++
1 file changed, 141 insertions(+)
create mode 100755 lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
diff --git a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
new file mode 100755
index 0000000000000..89227b094957c
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
@@ -0,0 +1,141 @@
+#!/usr/bin/python3
+
+import dis
+import sys
+from types import CodeType
+from typing import Iterable, Iterator, cast
+
+
+# TODO: strlen, fmt
+_SELECTORS = {
+ "Cast": "@cast",
+ "GetChildAtIndex": "@get_child_at_index",
+ "GetChildIndex": "@get_child_index",
+ "GetChildMemberWithName": "@get_child_with_name",
+ "GetNumChildren": "@get_num_children",
+ "GetSummary": "@summary",
+ "GetTemplateArgumentType": "@get_template_argument_type",
+ "GetType": "@get_type",
+ "GetValue": "@get_value",
+ "GetValueAsAddress": "@get_value_as_address",
+ "GetValueAsSigned": "@get_value_as_signed",
+ "GetValueAsUnsigned": "@get_value_as_unsigned",
+}
+
+
+def _main(source_file):
+ with open(source_file) as f:
+ source_code = f.read()
+ bytecode = dis.Bytecode(source_code)
+ for func_body in _function_bodies(bytecode):
+ instructions = dis.get_instructions(func_body)
+ for op in _translate(instructions):
+ print(op)
+
+
+def _function_bodies(bytecode: dis.Bytecode) -> Iterable[CodeType]:
+ """
+ Iterate the function bodies (code object children) of the given Bytecode.
+ """
+ for const in bytecode.codeobj.co_consts:
+ if hasattr(const, "co_code"):
+ yield const
+
+
+def _translate(instructions: Iterator[dis.Instruction]) -> list[str]:
+ """
+ Convert Python instructions to LLDB data formatter bytecode operations.
+ """
+ result = []
+ _translate_list(list(instructions), result)
+ return result
+
+
+def _translate_list(instructions: list[dis.Instruction], result: list[str]):
+ """
+ Convert sequences of Python bytecode to sequences of LLDB data formatter
+ bytecode.
+
+ This function performs course grained translations - sequences of input to
+ sequences of output. For translations of individual instructions, see
+ `_translate_instruction`.
+ """
+ while instructions:
+ inst = instructions.pop(0)
+ op = inst.opname
+ if op == "LOAD_METHOD":
+ # Method call sequences begin with a LOAD_METHOD instruction, then
+ # load the arguments on to the stack, and end with the CALL_METHOD
+ # instruction.
+ if selector := _SELECTORS.get(inst.argval):
+ while instructions:
+ if instructions[0] == "LOAD_METHOD":
+ # Begin a nested method call.
+ _translate_list(instructions, result)
+ else:
+ # TODO: Can LOAD_METHOD, ..., CALL_METHOD sequences
+ # contain flow control? If so this needs to gather
+ # instructions and call `_translate_list`, instead of
+ # handling each instruction individually.
+ x = instructions.pop(0)
+ if x.opname != "CALL_METHOD":
+ result.append(_translate_instruction(x))
+ else:
+ result.append(f"{selector} call")
+ break
+ elif op == "POP_JUMP_IF_FALSE":
+ # Convert to an `{ ... } if` sequence.
+ result.append("{")
+ offset = cast(int, inst.arg)
+ idx = _index_of_offset(instructions, offset)
+ # Split the condional block prefix from the remaining instructions.
+ block = instructions[:idx]
+ del instructions[:idx]
+ _translate_list(block, result)
+ result.append("} if")
+ else:
+ result.append(_translate_instruction(inst))
+
+
+def _translate_instruction(inst: dis.Instruction) -> str:
+ """
+ Convert a single Python bytecode instruction to an LLDB data formatter
+ bytecode operation.
+
+ This function performs one-to-one translations. For translations of
+ sequences of instructions, see `_translate_list`.
+ """
+ op = inst.opname
+ if op == "COMPARE_OP":
+ if inst.argval == "==":
+ return "="
+ elif op == "LOAD_CONST":
+ if isinstance(inst.argval, str):
+ # TODO: Handle strings with inner double quotes ("). Alternatively,
+ # use `repr()` and allow the bytecode assembly to use single quotes.
+ return f'"{inst.argval}"'
+ elif isinstance(inst.argval, bool):
+ num = int(inst.argval)
+ return f"{num}"
+ else:
+ return inst.argrepr
+ elif op == "LOAD_FAST":
+ return f"{inst.arg} pick # {inst.argval}"
+ elif op == "RETURN_VALUE":
+ return "return"
+ elif op in ("STORE_FAST", "STORE_NAME"):
+ # This is fake. There is no `put` operation (yet?).
+ return f"{inst.arg} put # {inst.argval}"
+ return op
+
+
+def _index_of_offset(instructions: list[dis.Instruction], offset) -> int:
+ """Find the index of the instruction having the given offset."""
+ for i, inst in enumerate(instructions):
+ if inst.offset == offset:
+ return i
+ raise ValueError(f"invalid offset: {offset}")
+
+
+if __name__ == "__main__":
+ _main(sys.argv[1])
>From 4df27154518ab326306d09a2de14abb7e85cb7e8 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Tue, 3 Mar 2026 10:55:06 -0800
Subject: [PATCH 4/8] Add initial support for compiling synthetic formatter
classes
---
.../formatter-bytecode/python_to_assembly.py | 266 +++++++++++++++---
1 file changed, 225 insertions(+), 41 deletions(-)
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py
index 98c03832227cc..693a3dabb6348 100755
--- a/lldb/examples/formatter-bytecode/python_to_assembly.py
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -3,12 +3,15 @@
import ast
import io
import sys
-from typing import Any
+from copy import copy
+from typing import Any, Optional, Sequence, Union, cast
BUILTINS = {
"Cast": "@cast",
+ "GetChildAtIndex": "@get_child_at_index",
"GetChildMemberWithName": "@get_child_with_name",
- "GetSummary": "@get_summary",
+ "GetSummary": "@summary",
+ "GetSyntheticValue": "@get_synthetic_value",
"GetTemplateArgumentType": "@get_template_argument_type",
"GetType": "@get_type",
"GetValueAsUnsigned": "@get_value_as_unsigned",
@@ -23,26 +26,115 @@
ast.GtE: "=>",
}
+# Maps Python method names in a formatter class to their bytecode signatures.
+METHOD_SIGS = {
+ "__init__": "@init",
+ "update": "@update",
+ "num_children": "@get_num_children",
+ "get_child_index": "@get_child_index",
+ "get_child_at_index": "@get_child_at_index",
+ "get_value": "@get_value",
+}
+
+
+class CompilerError(Exception):
+ lineno: int
+
+ def __init__(self, message, node: Union[ast.expr, ast.stmt]) -> None:
+ super().__init__(message)
+ self.lineno = node.lineno
+
+
class Compiler(ast.NodeVisitor):
- # Track the stack index of locals variables.
- #
- # This is essentially an ordered dictionary, where the key is an index on
- # the stack, and the value is the name of the variable whose value is at
- # that index.
- #
- # Ex: `locals[0]` is the name of the first value pushed on the stack, etc.
+ # Names of locals in bottom-to-top stack order. locals[0] is the
+ # oldest/deepest; locals[-1] is the most recently pushed.
locals: list[str]
+ # Names of visible attrs in bottom-to-top stack order. Always holds the
+ # full combined frame for the method being compiled: grows incrementally
+ # during __init__/update, and is set to the combined list before getter
+ # methods are compiled.
+ attrs: list[str]
+
+ # Temporaries currently on the stack above the locals/attrs frame.
+ # Always 0 at statement boundaries.
+ num_temps: int
+
+ # Bytecode signature of the method being compiled, or None for top-level
+ # functions.
+ current_sig: Optional[str]
+
buffer: io.StringIO
def __init__(self) -> None:
self.locals = []
+ self.attrs = []
+ self.num_temps = 0
+ self.current_sig = None
self.buffer = io.StringIO()
+ def compile(self, source_file: str) -> str:
+ with open(source_file) as f:
+ root = ast.parse(f.read())
+ self.visit(root)
+ return self.buffer.getvalue()
+
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
+ # Compile methods in a fixed order so that attrs is fully populated
+ # before getter methods are compiled.
+ methods = {}
+ for item in node.body:
+ if isinstance(item, ast.FunctionDef):
+ if item.name not in METHOD_SIGS:
+ raise CompilerError(f"unsupported method: {item.name}", item)
+ methods[item.name] = item
+
+ self.attrs = []
+ if method := methods.get("__init__"):
+ self._compile_method(method)
+ # self.attrs now holds init's attrs. update's attrs are appended above
+ # them, so after update self.attrs is the combined init+update list.
+ if method := methods.get("update"):
+ self._compile_method(method)
+
+ for method_name, method in methods.items():
+ if method_name not in ("__init__", "update"):
+ self._compile_method(method)
+
+ def _compile_method(self, node: ast.FunctionDef) -> None:
+ self.current_sig = METHOD_SIGS[node.name]
+ self.num_temps = 0
+
+ # Strip 'self' (and 'internal_dict' for __init__) from the arg list;
+ # the remaining args become the initial locals.
+ args = copy(node.args.args)
+ args.pop(0) # drop 'self'
+ if node.name == "__init__":
+ args.pop() # drop trailing 'internal_dict'
+
+ self.locals = [arg.arg for arg in args]
+
+ # Compile into a temporary buffer so the signature line can be
+ # emitted first.
+ saved_buffer = self.buffer
+ self.buffer = io.StringIO()
+
+ self._visit_each(node.body)
+
+ method_output = self.buffer.getvalue()
+ self.buffer = saved_buffer
+ self._output(f"@{self.current_sig}:")
+ self._output(method_output)
+
+ self.locals.clear()
+ self.current_sig = None
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
- # Initialize `locals` with the (positional) arguments.
+ # Top-level function (not inside a class).
+ self.current_sig = None
+ self.attrs = []
self.locals = [arg.arg for arg in node.args.args]
- self.generic_visit(node)
+ self._visit_each(node.body)
self.locals.clear()
def visit_Compare(self, node: ast.Compare) -> None:
@@ -50,13 +142,18 @@ def visit_Compare(self, node: ast.Compare) -> None:
# XXX: Does not handle multiple comparisons, ex: `0 < x < 10`
self.visit(node.comparators[0])
self._output(COMPS[type(node.ops[0])])
+ # The comparison consumes two values and produces one.
+ self.num_temps -= 1
def visit_If(self, node: ast.If) -> None:
self.visit(node.test)
+ # `if`/`ifelse` consumes the condition.
+ self.num_temps = 0
self._output("{")
self._visit_each(node.body)
if node.orelse:
+ self.num_temps = 0
self._output("} {")
self._visit_each(node.orelse)
self._output("} ifelse")
@@ -64,6 +161,7 @@ def visit_If(self, node: ast.If) -> None:
self._output("} if")
def visit_Return(self, node: ast.Return) -> None:
+ self.num_temps = 0
if node.value:
self.visit(node.value)
self._output("return")
@@ -75,61 +173,147 @@ def visit_Constant(self, node: ast.Constant) -> None:
self._output(int(node.value))
else:
self._output(node.value)
+ self.num_temps += 1
def visit_Call(self, node: ast.Call) -> None:
- if isinstance(node.func, ast.Attribute):
- # The receiver is the left hande side of the dot.
- receiver = node.func.value
- method = node.func.attr
+ func = node.func
+ if isinstance(func, ast.Attribute):
+ receiver = func.value
+ method = func.attr
+ # self is not a valid call receiver.
+ if isinstance(receiver, ast.Name) and receiver.id == "self":
+ raise CompilerError(
+ "self is not a valid call receiver; use self.attr to read an attribute",
+ node,
+ )
if selector := BUILTINS.get(method):
- # Visit the method's receiver to have its value on the stack.
self.visit(receiver)
- # Visit the args to position them on the stack.
self._visit_each(node.args)
self._output(f"{selector} call")
- else:
- # TODO: fail
- print(f"error: unsupported method {node.func.attr}", file=sys.stderr)
+ # `call` pops the receiver and all args, and pushes one result.
+ self.num_temps -= len(node.args)
+ return
+ raise CompilerError(f"unsupported method: {method}", node)
+
+ if isinstance(func, ast.Name):
+ raise CompilerError(f"unsupported function: {func.id}", node)
+
+ raise CompilerError("unsupported function call expression", node)
def visit_Assign(self, node: ast.Assign) -> None:
- # Visit RHS first, putting values on the stack.
- self.visit(node.value)
- # Determine the name(s). Either a single Name, or a Tuple of Names.
+ self.num_temps = 0
+
target = node.targets[0]
+
+ # Handle self.attr = expr (attribute assignment).
+ if (
+ isinstance(target, ast.Attribute)
+ and isinstance(target.value, ast.Name)
+ and target.value.id == "self"
+ ):
+ if self.current_sig not in ("@init", "@update"):
+ raise CompilerError(
+ "attribute assignment is only allowed in __init__ and update",
+ node,
+ )
+
+ attr = target.attr
+ if attr in self.attrs:
+ raise CompilerError(f"attribute '{attr}' is already assigned", node)
+
+ # If the RHS is an argument (the only kind of local permitted in
+ # __init__) - then it is already on the stack in place, and no
+ # evaluation is needed.
+ is_arg = (
+ isinstance(node.value, ast.Name)
+ and self._local_index(node.value) is not None
+ )
+ if not is_arg:
+ # Evaluate the RHS, leaving its value on the stack.
+ self.visit(node.value)
+
+ # Record the attr.
+ self.attrs.append(attr)
+ return
+
+ # Handle local variable assignment.
+ if self.current_sig in ("@init", "@update"):
+ raise CompilerError(
+ "local variable assignment is not allowed in __init__ or update; "
+ "use attribute assignment (self.attr = ...) instead",
+ node,
+ )
+
+ # Visit RHS, leaving its value on the stack.
+ self.visit(node.value)
if isinstance(target, ast.Name):
- names = [target.id]
+ names = [target]
elif isinstance(target, ast.Tuple):
- # These tuple elements are Name nodes.
- names = [x.id for x in target.elts]
+ names = cast(list[ast.Name], target.elts)
+ else:
+ names = []
# Forget any previous bindings of these names.
# Their values are orphaned on the stack.
- for local in self.locals:
- if local in names:
- old_idx = self.locals.index(local)
- self.locals[old_idx] = ""
+ for name in names:
+ idx = self._local_index(name)
+ if idx is not None:
+ self.locals[idx] = ""
- self.locals.extend(names)
+ self.locals.extend(x.id for x in names)
+
+ def visit_Attribute(self, node: ast.Attribute) -> None:
+ # Only self.attr reads are supported here.
+ if not (isinstance(node.value, ast.Name) and node.value.id == "self"):
+ raise CompilerError(
+ "unsupported attribute access (only self.attr is supported)", node
+ )
+ attr_idx = self._attr_index(node.attr, node)
+ pick_idx = self.num_temps + attr_idx
+ self._output(f"{pick_idx} pick # self.{node.attr}")
+ self.num_temps += 1
def visit_Name(self, node: ast.Name) -> None:
- idx = self.locals.index(node.id)
+ idx = self._stack_index(node)
+ if idx is None:
+ raise CompilerError(f"unknown local variable: {node.id}", node)
self._output(f"{idx} pick # {node.id}")
+ self.num_temps += 1
- def _visit_each(self, nodes: list[ast.AST]) -> None:
+ def _visit_each(self, nodes: Sequence[ast.AST]) -> None:
for child in nodes:
self.visit(child)
+ def _attr_index(self, name: str, node: ast.expr) -> int:
+ # self.attrs is always the full visible attr frame, so the index is
+ # the direct pick offset with no further adjustment.
+ try:
+ return self.attrs.index(name)
+ except ValueError:
+ raise CompilerError(f"unknown attribute: {name}", node)
+
+ def _stack_index(self, name: ast.Name) -> Optional[int]:
+ # Offset past all attrs and any in-flight temporaries.
+ idx = self._local_index(name)
+ if idx is None:
+ return None
+ return len(self.attrs) + idx + self.num_temps
+
+ def _local_index(self, name: ast.Name) -> Optional[int]:
+ try:
+ return self.locals.index(name.id)
+ except ValueError:
+ return None
+
def _output(self, x: Any) -> None:
print(x, file=self.buffer)
- @property
- def output(self) -> str:
- return compiler.buffer.getvalue()
-
if __name__ == "__main__":
- with open(sys.argv[1]) as f:
- root = ast.parse(f.read())
+ source_file = sys.argv[1]
compiler = Compiler()
- compiler.visit(root)
- print(compiler.output)
+ try:
+ output = compiler.compile(source_file)
+ print(output)
+ except CompilerError as e:
+ print(f"{source_file}:{e.lineno}: {e}", file=sys.stderr)
>From 08646420e365c811ca060bf7139c40398a024b74 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Tue, 3 Mar 2026 14:51:34 -0800
Subject: [PATCH 5/8] Fix unhandled cases in visit_Assign
---
lldb/examples/formatter-bytecode/python_to_assembly.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py
index 693a3dabb6348..2695c6ea075f1 100755
--- a/lldb/examples/formatter-bytecode/python_to_assembly.py
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -244,14 +244,15 @@ def visit_Assign(self, node: ast.Assign) -> None:
node,
)
- # Visit RHS, leaving its value on the stack.
- self.visit(node.value)
if isinstance(target, ast.Name):
names = [target]
elif isinstance(target, ast.Tuple):
names = cast(list[ast.Name], target.elts)
else:
- names = []
+ raise CompilerError("unsupported assignment target", node)
+
+ # Visit RHS, leaving its value on the stack.
+ self.visit(node.value)
# Forget any previous bindings of these names.
# Their values are orphaned on the stack.
>From 081ac068fde264c21802fe25270d8b124fa6e5e4 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Fri, 6 Mar 2026 12:22:52 -0800
Subject: [PATCH 6/8] Move Python compiler into formatter_bytecode
---
lldb/examples/python/formatter_bytecode.py | 354 ++++++++++++++++++++-
1 file changed, 344 insertions(+), 10 deletions(-)
diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py
index 471cb2c5b38ff..a188ee1819d24 100644
--- a/lldb/examples/python/formatter_bytecode.py
+++ b/lldb/examples/python/formatter_bytecode.py
@@ -17,9 +17,12 @@
import re
import io
+import ast
+import enum
import textwrap
+from copy import copy
from dataclasses import dataclass
-from typing import BinaryIO, TextIO, Tuple, Union
+from typing import Any, BinaryIO, Optional, Sequence, TextIO, Tuple, Union, cast
BINARY_VERSION = 1
@@ -173,22 +176,17 @@ def _segment_by_signature(input: list[str]) -> list[Tuple[str, list[str]]]:
signature = None
tokens = []
- def conclude_segment():
- if not tokens:
- raise ValueError(f"empty signature: {signature}")
- segments.append((signature, tokens))
-
for token in input:
if _SIGNATURE_LABEL.match(token):
if signature:
- conclude_segment()
+ segments.append((signature, tokens))
signature = token[1:-1] # strip leading @, trailing :
tokens = []
else:
tokens.append(token)
if signature:
- conclude_segment()
+ segments.append((signature, tokens))
return segments
@@ -295,7 +293,8 @@ def assemble_file(type_name: str, input: TextIO) -> BytecodeSection:
input_tokens = _tokenize(input.read())
signatures = []
for sig, tokens in _segment_by_signature(input_tokens):
- signatures.append((sig, assemble_tokens(tokens)))
+ if tokens:
+ signatures.append((sig, assemble_tokens(tokens)))
return BytecodeSection(type_name, flags=0, signatures=signatures)
@@ -662,6 +661,313 @@ def next_byte():
assert False
return data[-1]
+################################################################################
+# Python Compiler
+################################################################################
+
+_BUILTINS = {
+ "Cast": "@cast",
+ "GetChildAtIndex": "@get_child_at_index",
+ "GetChildMemberWithName": "@get_child_with_name",
+ "GetSummary": "@summary",
+ "GetSyntheticValue": "@get_synthetic_value",
+ "GetTemplateArgumentType": "@get_template_argument_type",
+ "GetType": "@get_type",
+ "GetValueAsUnsigned": "@get_value_as_unsigned",
+}
+
+_COMPS = {
+ ast.Eq: "=",
+ ast.NotEq: "!=",
+ ast.Lt: "<",
+ ast.LtE: "=<",
+ ast.Gt: ">",
+ ast.GtE: "=>",
+}
+
+# Maps Python method names in a formatter class to their bytecode signatures.
+_METHOD_SIGS = {
+ "__init__": "@init",
+ "update": "@update",
+ "num_children": "@get_num_children",
+ "get_child_index": "@get_child_index",
+ "get_child_at_index": "@get_child_at_index",
+ "get_value": "@get_value",
+}
+
+
+class CompilerError(Exception):
+ lineno: int
+
+ def __init__(self, message, node: Union[ast.expr, ast.stmt]) -> None:
+ super().__init__(message)
+ self.lineno = node.lineno
+
+
+class Compiler(ast.NodeVisitor):
+ # Names of locals in bottom-to-top stack order. locals[0] is the
+ # oldest/deepest; locals[-1] is the most recently pushed.
+ locals: list[str]
+
+ # Names of visible attrs in bottom-to-top stack order. Always holds the
+ # full combined frame for the method being compiled: grows incrementally
+ # during __init__/update, and is set to the combined list before getter
+ # methods are compiled.
+ attrs: list[str]
+
+ # Temporaries currently on the stack above the locals/attrs frame.
+ # Always 0 at statement boundaries.
+ num_temps: int
+
+ # Bytecode signature of the method being compiled, or None for top-level
+ # functions.
+ current_sig: Optional[str]
+
+ buffer: io.StringIO
+
+ def __init__(self) -> None:
+ self.locals = []
+ self.attrs = []
+ self.num_temps = 0
+ self.current_sig = None
+ self.buffer = io.StringIO()
+
+ def compile(self, source_file: str) -> str:
+ with open(source_file) as f:
+ root = ast.parse(f.read())
+ self.visit(root)
+ return self.buffer.getvalue()
+
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
+ # Compile methods in a fixed order so that attrs is fully populated
+ # before getter methods are compiled.
+ methods = {}
+ for item in node.body:
+ if isinstance(item, ast.FunctionDef):
+ if item.name not in _METHOD_SIGS:
+ raise CompilerError(f"unsupported method: {item.name}", item)
+ methods[item.name] = item
+
+ self.attrs = []
+ if method := methods.get("__init__"):
+ self._compile_method(method)
+ # self.attrs now holds init's attrs. update's attrs are appended above
+ # them, so after update self.attrs is the combined init+update list.
+ if method := methods.get("update"):
+ self._compile_method(method)
+
+ for method_name, method in methods.items():
+ if method_name not in ("__init__", "update"):
+ self._compile_method(method)
+
+ def _compile_method(self, node: ast.FunctionDef) -> None:
+ self.current_sig = _METHOD_SIGS[node.name]
+ self.num_temps = 0
+
+ # Strip 'self' (and 'internal_dict' for __init__) from the arg list;
+ # the remaining args become the initial locals.
+ args = copy(node.args.args)
+ args.pop(0) # drop 'self'
+ if node.name == "__init__":
+ args.pop() # drop trailing 'internal_dict'
+
+ self.locals = [arg.arg for arg in args]
+
+ # Compile into a temporary buffer so the signature line can be
+ # emitted first.
+ saved_buffer = self.buffer
+ self.buffer = io.StringIO()
+
+ self._visit_each(node.body)
+
+ method_output = self.buffer.getvalue()
+ self.buffer = saved_buffer
+ self._output(f"{self.current_sig}:")
+ self._output(method_output)
+
+ self.locals.clear()
+ self.current_sig = None
+
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+ # Top-level function (not inside a class).
+ self.current_sig = None
+ self.attrs = []
+ self.locals = [arg.arg for arg in node.args.args]
+ self._visit_each(node.body)
+ self.locals.clear()
+
+ def visit_Compare(self, node: ast.Compare) -> None:
+ self.visit(node.left)
+ # XXX: Does not handle multiple comparisons, ex: `0 < x < 10`
+ self.visit(node.comparators[0])
+ self._output(_COMPS[type(node.ops[0])])
+ # The comparison consumes two values and produces one.
+ self.num_temps -= 1
+
+ def visit_If(self, node: ast.If) -> None:
+ self.visit(node.test)
+ # `if`/`ifelse` consumes the condition.
+ self.num_temps = 0
+
+ self._output("{")
+ self._visit_each(node.body)
+ if node.orelse:
+ self.num_temps = 0
+ self._output("} {")
+ self._visit_each(node.orelse)
+ self._output("} ifelse")
+ else:
+ self._output("} if")
+
+ def visit_Return(self, node: ast.Return) -> None:
+ self.num_temps = 0
+ if node.value:
+ self.visit(node.value)
+ self._output("return")
+
+ def visit_Constant(self, node: ast.Constant) -> None:
+ if isinstance(node.value, str):
+ self._output(f'"{node.value}"')
+ elif isinstance(node.value, bool):
+ self._output(int(node.value))
+ else:
+ self._output(node.value)
+ self.num_temps += 1
+
+ def visit_Call(self, node: ast.Call) -> None:
+ func = node.func
+ if isinstance(func, ast.Attribute):
+ receiver = func.value
+ method = func.attr
+ # self is not a valid call receiver.
+ if isinstance(receiver, ast.Name) and receiver.id == "self":
+ raise CompilerError(
+ "self is not a valid call receiver; use self.attr to read an attribute",
+ node,
+ )
+ if selector := _BUILTINS.get(method):
+ self.visit(receiver)
+ self._visit_each(node.args)
+ self._output(f"{selector} call")
+ # `call` pops the receiver and all args, and pushes one result.
+ self.num_temps -= len(node.args)
+ return
+ raise CompilerError(f"unsupported method: {method}", node)
+
+ if isinstance(func, ast.Name):
+ raise CompilerError(f"unsupported function: {func.id}", node)
+
+ raise CompilerError("unsupported function call expression", node)
+
+ def visit_Assign(self, node: ast.Assign) -> None:
+ self.num_temps = 0
+
+ target = node.targets[0]
+
+ # Handle self.attr = expr (attribute assignment).
+ if (
+ isinstance(target, ast.Attribute)
+ and isinstance(target.value, ast.Name)
+ and target.value.id == "self"
+ ):
+ if self.current_sig not in ("@init", "@update"):
+ raise CompilerError(
+ "attribute assignment is only allowed in __init__ and update",
+ node,
+ )
+
+ attr = target.attr
+ if attr in self.attrs:
+ raise CompilerError(f"attribute '{attr}' is already assigned", node)
+
+ # If the RHS is an argument (the only kind of local permitted in
+ # __init__) - then it is already on the stack in place, and no
+ # evaluation is needed.
+ is_arg = (
+ isinstance(node.value, ast.Name)
+ and self._local_index(node.value) is not None
+ )
+ if not is_arg:
+ # Evaluate the RHS, leaving its value on the stack.
+ self.visit(node.value)
+
+ # Record the attr.
+ self.attrs.append(attr)
+ return
+
+ # Handle local variable assignment.
+ if self.current_sig in ("@init", "@update"):
+ raise CompilerError(
+ "local variable assignment is not allowed in __init__ or update; "
+ "use attribute assignment (self.attr = ...) instead",
+ node,
+ )
+
+ if isinstance(target, ast.Name):
+ names = [target]
+ elif isinstance(target, ast.Tuple):
+ names = cast(list[ast.Name], target.elts)
+ else:
+ raise CompilerError("unsupported assignment target", node)
+
+ # Visit RHS, leaving its value on the stack.
+ self.visit(node.value)
+
+ # Forget any previous bindings of these names.
+ # Their values are orphaned on the stack.
+ for name in names:
+ idx = self._local_index(name)
+ if idx is not None:
+ self.locals[idx] = ""
+
+ self.locals.extend(x.id for x in names)
+
+ def visit_Attribute(self, node: ast.Attribute) -> None:
+ # Only self.attr reads are supported here.
+ if not (isinstance(node.value, ast.Name) and node.value.id == "self"):
+ raise CompilerError(
+ "unsupported attribute access (only self.attr is supported)", node
+ )
+ attr_idx = self._attr_index(node.attr, node)
+ pick_idx = self.num_temps + attr_idx
+ self._output(f"{pick_idx} pick") # "# self.{node.attr}"
+ self.num_temps += 1
+
+ def visit_Name(self, node: ast.Name) -> None:
+ idx = self._stack_index(node)
+ if idx is None:
+ raise CompilerError(f"unknown local variable: {node.id}", node)
+ self._output(f"{idx} pick") # "# {node.id}"
+ self.num_temps += 1
+
+ def _visit_each(self, nodes: Sequence[ast.AST]) -> None:
+ for child in nodes:
+ self.visit(child)
+
+ def _attr_index(self, name: str, node: ast.expr) -> int:
+ # self.attrs is always the full visible attr frame, so the index is
+ # the direct pick offset with no further adjustment.
+ try:
+ return self.attrs.index(name)
+ except ValueError:
+ raise CompilerError(f"unknown attribute: {name}", node)
+
+ def _stack_index(self, name: ast.Name) -> Optional[int]:
+ # Offset past all attrs and any in-flight temporaries.
+ idx = self._local_index(name)
+ if idx is None:
+ return None
+ return len(self.attrs) + idx + self.num_temps
+
+ def _local_index(self, name: ast.Name) -> Optional[int]:
+ try:
+ return self.locals.index(name.id)
+ except ValueError:
+ return None
+
+ def _output(self, x: Any) -> None:
+ print(x, file=self.buffer)
+
################################################################################
# Helper functions.
@@ -717,6 +1023,12 @@ def _main():
mode = parser.add_mutually_exclusive_group()
mode.add_argument(
"-c",
+ "--compile",
+ action="store_true",
+ help="compile Python into bytecode",
+ )
+ mode.add_argument(
+ "-a",
"--assemble",
action="store_true",
help="assemble assembly into bytecode",
@@ -727,6 +1039,7 @@ def _main():
action="store_true",
help="disassemble bytecode",
)
+ parser.add_argument("-n", "--type-name", help="source type of formatter")
parser.add_argument(
"-o",
"--output",
@@ -742,7 +1055,28 @@ def _main():
parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
args = parser.parse_args()
- if args.assemble:
+ if args.compile:
+ if not args.type_name:
+ parser.error("--type-name is required with --compile")
+ if not args.output:
+ parser.error("--output is required with --compile")
+ compiler = Compiler()
+ try:
+ assembly = compiler.compile(args.input)
+ except CompilerError as e:
+ print(f"{args.input}:{e.lineno}: {e}", file=sys.stderr)
+ return
+
+ section = assemble_file(args.type_name, io.StringIO(assembly))
+ if args.format == "binary":
+ with open(args.output, "wb") as output:
+ section.write_binary(output)
+ else: # args.format == "c"
+ with open(args.output, "w") as output:
+ section.write_source(output)
+ elif args.assemble:
+ if not args.type_name:
+ parser.error("--type-name is required with --assemble")
if not args.output:
parser.error("--output is required with --assemble")
with open(args.input) as input:
>From e3cf78cd351304a0da41bd294592e47546123845 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Fri, 6 Mar 2026 13:05:48 -0800
Subject: [PATCH 7/8] Delete old files
---
.../bytecode_to_bytecode.py | 141 --------
.../formatter-bytecode/optional_summary.py | 14 -
.../formatter-bytecode/python_to_assembly.py | 320 ------------------
3 files changed, 475 deletions(-)
delete mode 100755 lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
delete mode 100644 lldb/examples/formatter-bytecode/optional_summary.py
delete mode 100755 lldb/examples/formatter-bytecode/python_to_assembly.py
diff --git a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
deleted file mode 100755
index 89227b094957c..0000000000000
--- a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/python3
-
-import dis
-import sys
-from types import CodeType
-from typing import Iterable, Iterator, cast
-
-
-# TODO: strlen, fmt
-_SELECTORS = {
- "Cast": "@cast",
- "GetChildAtIndex": "@get_child_at_index",
- "GetChildIndex": "@get_child_index",
- "GetChildMemberWithName": "@get_child_with_name",
- "GetNumChildren": "@get_num_children",
- "GetSummary": "@summary",
- "GetTemplateArgumentType": "@get_template_argument_type",
- "GetType": "@get_type",
- "GetValue": "@get_value",
- "GetValueAsAddress": "@get_value_as_address",
- "GetValueAsSigned": "@get_value_as_signed",
- "GetValueAsUnsigned": "@get_value_as_unsigned",
-}
-
-
-def _main(source_file):
- with open(source_file) as f:
- source_code = f.read()
- bytecode = dis.Bytecode(source_code)
- for func_body in _function_bodies(bytecode):
- instructions = dis.get_instructions(func_body)
- for op in _translate(instructions):
- print(op)
-
-
-def _function_bodies(bytecode: dis.Bytecode) -> Iterable[CodeType]:
- """
- Iterate the function bodies (code object children) of the given Bytecode.
- """
- for const in bytecode.codeobj.co_consts:
- if hasattr(const, "co_code"):
- yield const
-
-
-def _translate(instructions: Iterator[dis.Instruction]) -> list[str]:
- """
- Convert Python instructions to LLDB data formatter bytecode operations.
- """
- result = []
- _translate_list(list(instructions), result)
- return result
-
-
-def _translate_list(instructions: list[dis.Instruction], result: list[str]):
- """
- Convert sequences of Python bytecode to sequences of LLDB data formatter
- bytecode.
-
- This function performs course grained translations - sequences of input to
- sequences of output. For translations of individual instructions, see
- `_translate_instruction`.
- """
- while instructions:
- inst = instructions.pop(0)
- op = inst.opname
- if op == "LOAD_METHOD":
- # Method call sequences begin with a LOAD_METHOD instruction, then
- # load the arguments on to the stack, and end with the CALL_METHOD
- # instruction.
- if selector := _SELECTORS.get(inst.argval):
- while instructions:
- if instructions[0] == "LOAD_METHOD":
- # Begin a nested method call.
- _translate_list(instructions, result)
- else:
- # TODO: Can LOAD_METHOD, ..., CALL_METHOD sequences
- # contain flow control? If so this needs to gather
- # instructions and call `_translate_list`, instead of
- # handling each instruction individually.
- x = instructions.pop(0)
- if x.opname != "CALL_METHOD":
- result.append(_translate_instruction(x))
- else:
- result.append(f"{selector} call")
- break
- elif op == "POP_JUMP_IF_FALSE":
- # Convert to an `{ ... } if` sequence.
- result.append("{")
- offset = cast(int, inst.arg)
- idx = _index_of_offset(instructions, offset)
- # Split the condional block prefix from the remaining instructions.
- block = instructions[:idx]
- del instructions[:idx]
- _translate_list(block, result)
- result.append("} if")
- else:
- result.append(_translate_instruction(inst))
-
-
-def _translate_instruction(inst: dis.Instruction) -> str:
- """
- Convert a single Python bytecode instruction to an LLDB data formatter
- bytecode operation.
-
- This function performs one-to-one translations. For translations of
- sequences of instructions, see `_translate_list`.
- """
- op = inst.opname
- if op == "COMPARE_OP":
- if inst.argval == "==":
- return "="
- elif op == "LOAD_CONST":
- if isinstance(inst.argval, str):
- # TODO: Handle strings with inner double quotes ("). Alternatively,
- # use `repr()` and allow the bytecode assembly to use single quotes.
- return f'"{inst.argval}"'
- elif isinstance(inst.argval, bool):
- num = int(inst.argval)
- return f"{num}"
- else:
- return inst.argrepr
- elif op == "LOAD_FAST":
- return f"{inst.arg} pick # {inst.argval}"
- elif op == "RETURN_VALUE":
- return "return"
- elif op in ("STORE_FAST", "STORE_NAME"):
- # This is fake. There is no `put` operation (yet?).
- return f"{inst.arg} put # {inst.argval}"
- return op
-
-
-def _index_of_offset(instructions: list[dis.Instruction], offset) -> int:
- """Find the index of the instruction having the given offset."""
- for i, inst in enumerate(instructions):
- if inst.offset == offset:
- return i
- raise ValueError(f"invalid offset: {offset}")
-
-
-if __name__ == "__main__":
- _main(sys.argv[1])
diff --git a/lldb/examples/formatter-bytecode/optional_summary.py b/lldb/examples/formatter-bytecode/optional_summary.py
deleted file mode 100644
index 68e672d86613d..0000000000000
--- a/lldb/examples/formatter-bytecode/optional_summary.py
+++ /dev/null
@@ -1,14 +0,0 @@
-def OptionalSummaryProvider(valobj, _):
- failure = 2
- storage = valobj.GetChildMemberWithName("Storage")
- hasVal = storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure)
- if hasVal == failure:
- return "<could not read Optional>"
-
- if hasVal == 0:
- return "None"
-
- underlying_type = storage.GetType().GetTemplateArgumentType(0)
- value = storage.GetChildMemberWithName("value")
- value = value.Cast(underlying_type)
- return value.GetSummary()
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py
deleted file mode 100755
index 2695c6ea075f1..0000000000000
--- a/lldb/examples/formatter-bytecode/python_to_assembly.py
+++ /dev/null
@@ -1,320 +0,0 @@
-#!/usr/bin/python3
-
-import ast
-import io
-import sys
-from copy import copy
-from typing import Any, Optional, Sequence, Union, cast
-
-BUILTINS = {
- "Cast": "@cast",
- "GetChildAtIndex": "@get_child_at_index",
- "GetChildMemberWithName": "@get_child_with_name",
- "GetSummary": "@summary",
- "GetSyntheticValue": "@get_synthetic_value",
- "GetTemplateArgumentType": "@get_template_argument_type",
- "GetType": "@get_type",
- "GetValueAsUnsigned": "@get_value_as_unsigned",
-}
-
-COMPS = {
- ast.Eq: "=",
- ast.NotEq: "!=",
- ast.Lt: "<",
- ast.LtE: "=<",
- ast.Gt: ">",
- ast.GtE: "=>",
-}
-
-# Maps Python method names in a formatter class to their bytecode signatures.
-METHOD_SIGS = {
- "__init__": "@init",
- "update": "@update",
- "num_children": "@get_num_children",
- "get_child_index": "@get_child_index",
- "get_child_at_index": "@get_child_at_index",
- "get_value": "@get_value",
-}
-
-
-class CompilerError(Exception):
- lineno: int
-
- def __init__(self, message, node: Union[ast.expr, ast.stmt]) -> None:
- super().__init__(message)
- self.lineno = node.lineno
-
-
-class Compiler(ast.NodeVisitor):
- # Names of locals in bottom-to-top stack order. locals[0] is the
- # oldest/deepest; locals[-1] is the most recently pushed.
- locals: list[str]
-
- # Names of visible attrs in bottom-to-top stack order. Always holds the
- # full combined frame for the method being compiled: grows incrementally
- # during __init__/update, and is set to the combined list before getter
- # methods are compiled.
- attrs: list[str]
-
- # Temporaries currently on the stack above the locals/attrs frame.
- # Always 0 at statement boundaries.
- num_temps: int
-
- # Bytecode signature of the method being compiled, or None for top-level
- # functions.
- current_sig: Optional[str]
-
- buffer: io.StringIO
-
- def __init__(self) -> None:
- self.locals = []
- self.attrs = []
- self.num_temps = 0
- self.current_sig = None
- self.buffer = io.StringIO()
-
- def compile(self, source_file: str) -> str:
- with open(source_file) as f:
- root = ast.parse(f.read())
- self.visit(root)
- return self.buffer.getvalue()
-
- def visit_ClassDef(self, node: ast.ClassDef) -> None:
- # Compile methods in a fixed order so that attrs is fully populated
- # before getter methods are compiled.
- methods = {}
- for item in node.body:
- if isinstance(item, ast.FunctionDef):
- if item.name not in METHOD_SIGS:
- raise CompilerError(f"unsupported method: {item.name}", item)
- methods[item.name] = item
-
- self.attrs = []
- if method := methods.get("__init__"):
- self._compile_method(method)
- # self.attrs now holds init's attrs. update's attrs are appended above
- # them, so after update self.attrs is the combined init+update list.
- if method := methods.get("update"):
- self._compile_method(method)
-
- for method_name, method in methods.items():
- if method_name not in ("__init__", "update"):
- self._compile_method(method)
-
- def _compile_method(self, node: ast.FunctionDef) -> None:
- self.current_sig = METHOD_SIGS[node.name]
- self.num_temps = 0
-
- # Strip 'self' (and 'internal_dict' for __init__) from the arg list;
- # the remaining args become the initial locals.
- args = copy(node.args.args)
- args.pop(0) # drop 'self'
- if node.name == "__init__":
- args.pop() # drop trailing 'internal_dict'
-
- self.locals = [arg.arg for arg in args]
-
- # Compile into a temporary buffer so the signature line can be
- # emitted first.
- saved_buffer = self.buffer
- self.buffer = io.StringIO()
-
- self._visit_each(node.body)
-
- method_output = self.buffer.getvalue()
- self.buffer = saved_buffer
- self._output(f"@{self.current_sig}:")
- self._output(method_output)
-
- self.locals.clear()
- self.current_sig = None
-
- def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
- # Top-level function (not inside a class).
- self.current_sig = None
- self.attrs = []
- self.locals = [arg.arg for arg in node.args.args]
- self._visit_each(node.body)
- self.locals.clear()
-
- def visit_Compare(self, node: ast.Compare) -> None:
- self.visit(node.left)
- # XXX: Does not handle multiple comparisons, ex: `0 < x < 10`
- self.visit(node.comparators[0])
- self._output(COMPS[type(node.ops[0])])
- # The comparison consumes two values and produces one.
- self.num_temps -= 1
-
- def visit_If(self, node: ast.If) -> None:
- self.visit(node.test)
- # `if`/`ifelse` consumes the condition.
- self.num_temps = 0
-
- self._output("{")
- self._visit_each(node.body)
- if node.orelse:
- self.num_temps = 0
- self._output("} {")
- self._visit_each(node.orelse)
- self._output("} ifelse")
- else:
- self._output("} if")
-
- def visit_Return(self, node: ast.Return) -> None:
- self.num_temps = 0
- if node.value:
- self.visit(node.value)
- self._output("return")
-
- def visit_Constant(self, node: ast.Constant) -> None:
- if isinstance(node.value, str):
- self._output(f'"{node.value}"')
- elif isinstance(node.value, bool):
- self._output(int(node.value))
- else:
- self._output(node.value)
- self.num_temps += 1
-
- def visit_Call(self, node: ast.Call) -> None:
- func = node.func
- if isinstance(func, ast.Attribute):
- receiver = func.value
- method = func.attr
- # self is not a valid call receiver.
- if isinstance(receiver, ast.Name) and receiver.id == "self":
- raise CompilerError(
- "self is not a valid call receiver; use self.attr to read an attribute",
- node,
- )
- if selector := BUILTINS.get(method):
- self.visit(receiver)
- self._visit_each(node.args)
- self._output(f"{selector} call")
- # `call` pops the receiver and all args, and pushes one result.
- self.num_temps -= len(node.args)
- return
- raise CompilerError(f"unsupported method: {method}", node)
-
- if isinstance(func, ast.Name):
- raise CompilerError(f"unsupported function: {func.id}", node)
-
- raise CompilerError("unsupported function call expression", node)
-
- def visit_Assign(self, node: ast.Assign) -> None:
- self.num_temps = 0
-
- target = node.targets[0]
-
- # Handle self.attr = expr (attribute assignment).
- if (
- isinstance(target, ast.Attribute)
- and isinstance(target.value, ast.Name)
- and target.value.id == "self"
- ):
- if self.current_sig not in ("@init", "@update"):
- raise CompilerError(
- "attribute assignment is only allowed in __init__ and update",
- node,
- )
-
- attr = target.attr
- if attr in self.attrs:
- raise CompilerError(f"attribute '{attr}' is already assigned", node)
-
- # If the RHS is an argument (the only kind of local permitted in
- # __init__) - then it is already on the stack in place, and no
- # evaluation is needed.
- is_arg = (
- isinstance(node.value, ast.Name)
- and self._local_index(node.value) is not None
- )
- if not is_arg:
- # Evaluate the RHS, leaving its value on the stack.
- self.visit(node.value)
-
- # Record the attr.
- self.attrs.append(attr)
- return
-
- # Handle local variable assignment.
- if self.current_sig in ("@init", "@update"):
- raise CompilerError(
- "local variable assignment is not allowed in __init__ or update; "
- "use attribute assignment (self.attr = ...) instead",
- node,
- )
-
- if isinstance(target, ast.Name):
- names = [target]
- elif isinstance(target, ast.Tuple):
- names = cast(list[ast.Name], target.elts)
- else:
- raise CompilerError("unsupported assignment target", node)
-
- # Visit RHS, leaving its value on the stack.
- self.visit(node.value)
-
- # Forget any previous bindings of these names.
- # Their values are orphaned on the stack.
- for name in names:
- idx = self._local_index(name)
- if idx is not None:
- self.locals[idx] = ""
-
- self.locals.extend(x.id for x in names)
-
- def visit_Attribute(self, node: ast.Attribute) -> None:
- # Only self.attr reads are supported here.
- if not (isinstance(node.value, ast.Name) and node.value.id == "self"):
- raise CompilerError(
- "unsupported attribute access (only self.attr is supported)", node
- )
- attr_idx = self._attr_index(node.attr, node)
- pick_idx = self.num_temps + attr_idx
- self._output(f"{pick_idx} pick # self.{node.attr}")
- self.num_temps += 1
-
- def visit_Name(self, node: ast.Name) -> None:
- idx = self._stack_index(node)
- if idx is None:
- raise CompilerError(f"unknown local variable: {node.id}", node)
- self._output(f"{idx} pick # {node.id}")
- self.num_temps += 1
-
- def _visit_each(self, nodes: Sequence[ast.AST]) -> None:
- for child in nodes:
- self.visit(child)
-
- def _attr_index(self, name: str, node: ast.expr) -> int:
- # self.attrs is always the full visible attr frame, so the index is
- # the direct pick offset with no further adjustment.
- try:
- return self.attrs.index(name)
- except ValueError:
- raise CompilerError(f"unknown attribute: {name}", node)
-
- def _stack_index(self, name: ast.Name) -> Optional[int]:
- # Offset past all attrs and any in-flight temporaries.
- idx = self._local_index(name)
- if idx is None:
- return None
- return len(self.attrs) + idx + self.num_temps
-
- def _local_index(self, name: ast.Name) -> Optional[int]:
- try:
- return self.locals.index(name.id)
- except ValueError:
- return None
-
- def _output(self, x: Any) -> None:
- print(x, file=self.buffer)
-
-
-if __name__ == "__main__":
- source_file = sys.argv[1]
- compiler = Compiler()
- try:
- output = compiler.compile(source_file)
- print(output)
- except CompilerError as e:
- print(f"{source_file}:{e.lineno}: {e}", file=sys.stderr)
>From eed1a75aaf3f3ea5482c4e7e744717c54f42f06b Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Fri, 6 Mar 2026 13:53:15 -0800
Subject: [PATCH 8/8] fix formatting
---
lldb/examples/python/formatter_bytecode.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py
index a188ee1819d24..a157dc49a6b7e 100644
--- a/lldb/examples/python/formatter_bytecode.py
+++ b/lldb/examples/python/formatter_bytecode.py
@@ -661,6 +661,7 @@ def next_byte():
assert False
return data[-1]
+
################################################################################
# Python Compiler
################################################################################
More information about the lldb-commits
mailing list