[Lldb-commits] [lldb] [lldb] Add C source output mode to formatter_bytecode.py (PR #184242)
Dave Lee via lldb-commits
lldb-commits at lists.llvm.org
Tue Mar 3 14:55:45 PST 2026
https://github.com/kastiglione updated https://github.com/llvm/llvm-project/pull/184242
>From 167abfa39cb5ed9bb6805b6890cd00c9eea2e2ee Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Mon, 2 Mar 2026 14:03:53 -0800
Subject: [PATCH 1/2] [lldb] Add C source output mode to formatter_bytecode.py
---
lldb/examples/python/formatter_bytecode.py | 138 +++++++++++++++++++--
1 file changed, 126 insertions(+), 12 deletions(-)
diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py
index 8f2f09834ff4f..8386785b80d84 100644
--- a/lldb/examples/python/formatter_bytecode.py
+++ b/lldb/examples/python/formatter_bytecode.py
@@ -17,8 +17,9 @@
import re
import io
+import textwrap
from dataclasses import dataclass
-from typing import BinaryIO, TextIO, Tuple, Union
+from typing import Any, BinaryIO, TextIO, Tuple, Union
BINARY_VERSION = 1
@@ -207,9 +208,7 @@ def validate(self):
raise ValueError(f"duplicate signature: {sig}")
seen.add(sig)
- def write_binary(self, output: BinaryIO) -> None:
- self.validate()
-
+ def _to_binary(self) -> bytes:
bin = bytearray()
bin.extend(_to_uleb(len(self.type_name)))
bin.extend(bytes(self.type_name, encoding="utf-8"))
@@ -219,9 +218,77 @@ def write_binary(self, output: BinaryIO) -> None:
bin.extend(_to_uleb(len(bc)))
bin.extend(bc)
+ return bytes(bin)
+
+ def write_binary(self, output: BinaryIO) -> None:
+ self.validate()
+
+ bin = self._to_binary()
output.write(_to_byte(BINARY_VERSION))
output.write(_to_uleb(len(bin)))
- output.write(bin)
+ output.write(self._to_binary())
+
+ class _CBuilder:
+ """Helper class for emitting binary data as a C-string literal."""
+
+ entries: list[Tuple[Any, str]]
+
+ def __init__(self) -> None:
+ self.entries = []
+
+ def add_byte(self, x: int, comment: str) -> None:
+ self.add_bytes(_to_byte(x), comment)
+
+ def add_uleb(self, x: int, comment: str) -> None:
+ self.add_bytes(_to_uleb(x), comment)
+
+ def add_bytes(self, x: bytes, comment: str) -> None:
+ # Construct zero padded hex values with length two.
+ string = "".join(f"\\x{b:02x}" for b in x)
+ self.add_string(string, comment)
+
+ def add_string(self, string: str, comment: str) -> None:
+ self.entries.append((f'"{string}"', comment))
+
+ def write_source(self, output: TextIO) -> None:
+ self.validate()
+
+ size = len(self._to_binary())
+
+ b = self._CBuilder()
+ b.add_byte(BINARY_VERSION, "version")
+ b.add_uleb(size, "remaining record size")
+ b.add_uleb(len(self.type_name), "type name size")
+ b.add_string(self.type_name, "type name")
+ b.add_byte(self.flags, "flags")
+ for sig, bc in self.signatures:
+ b.add_byte(SIGNATURES[sig], f"sig_{sig}")
+ b.add_uleb(len(bc), "program size")
+ b.add_bytes(bc, "program")
+
+ print(
+ textwrap.dedent(
+ """
+ #ifdef __APPLE__
+ #define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
+ #else
+ #define FORMATTER_SECTION ".lldbformatters"
+ #endif
+ """
+ ),
+ file=output,
+ )
+ var_name = re.sub(r"\W", "_", self.type_name)
+ print(
+ "__attribute__((used, section(FORMATTER_SECTION)))",
+ file=output,
+ )
+ print(f"unsigned char _{var_name}_synthetic[] =", file=output)
+ indent = " "
+ for string, comment in b.entries:
+ print(f"{indent}// {comment}", file=output)
+ print(f"{indent}{string}", file=output)
+ print(";", file=output)
def compile_file(type_name: str, input: TextIO) -> BytecodeSection:
@@ -601,7 +668,7 @@ def next_byte():
################################################################################
-def _to_uleb(value: int) -> bytearray:
+def _to_uleb(value: int) -> bytes:
"""Encode an integer to ULEB128 bytes."""
if value < 0:
raise ValueError(f"negative number cannot be encoded to ULEB128: {value}")
@@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray:
if value == 0:
break
- return result
+ return bytes(result)
def _from_uleb(stream: BinaryIO) -> int:
@@ -665,18 +732,27 @@ def _main():
"--output",
help="output file (required for --compile)",
)
+ parser.add_argument(
+ "-f",
+ "--format",
+ choices=("binary", "c"),
+ default="binary",
+ help="output file format",
+ )
parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
args = parser.parse_args()
if args.compile:
if not args.output:
parser.error("--output is required with --compile")
- with (
- open(args.input) as input,
- open(args.output, "wb") as output,
- ):
+ with open(args.input) as input:
section = compile_file(args.type_name, input)
- section.write_binary(output)
+ if args.format == "binary":
+ with open(args.output, "wb") as output:
+ section.write_binary(output)
+ else: # args.format == "c"
+ with open(args.output, "w") as output:
+ section.write_source(output)
elif args.disassemble:
if args.output:
with (
@@ -758,4 +834,42 @@ def run_disassemble(binary):
with self.assertRaises(ValueError):
run_compile("MyType", "@summary: 1u return\n at summary: 2u return")
+ def test_write_source(self):
+ # Use the Account example from main.cpp as a reference, whose
+ # exact byte values are known.
+ section = BytecodeSection(
+ type_name="Account",
+ flags=0,
+ signatures=[
+ ("get_num_children", bytes([0x20, 0x01])),
+ ("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 0x11, 0x60])),
+ ],
+ )
+ out = io.StringIO()
+ section.write_source(out)
+ src = out.getvalue()
+
+ self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", src)
+ self.assertIn("unsigned char _Account_synthetic[] =", src)
+ self.assertIn('"\\x01"', src) # version
+ self.assertIn('"\\x15"', src) # record size (21)
+ self.assertIn('"\\x07"', src) # type name size (7)
+ self.assertIn('"Account"', src) # type name
+ self.assertIn('"\\x00"', src) # flags
+ self.assertIn('"\\x02"', src) # sig_get_num_children
+ self.assertIn('"\\x20\\x01"', src) # program
+ self.assertIn('"\\x04"', src) # sig_get_child_at_index
+ self.assertIn('"\\x06"', src) # program size
+ self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src) # program
+ self.assertIn("// version", src)
+ self.assertIn("// type name", src)
+ self.assertIn("// program", src)
+ # Semicolon terminates the array initializer.
+ self.assertEqual(src.count(";"), 1)
+
+ # Non-identifier characters in the type name are replaced with '_'.
+ out2 = io.StringIO()
+ BytecodeSection("std::vector<int>", 0, []).write_source(out2)
+ self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue())
+
unittest.main(argv=[__file__])
>From 5b82525e489ed6a69a795e87206e46b7d78913a2 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com at gmail.com>
Date: Tue, 3 Mar 2026 10:49:12 -0800
Subject: [PATCH 2/2] Declare more precise type for _CBuilder.entries
---
lldb/examples/python/formatter_bytecode.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py
index 8386785b80d84..1ad10d60fb974 100644
--- a/lldb/examples/python/formatter_bytecode.py
+++ b/lldb/examples/python/formatter_bytecode.py
@@ -19,7 +19,7 @@
import io
import textwrap
from dataclasses import dataclass
-from typing import Any, BinaryIO, TextIO, Tuple, Union
+from typing import BinaryIO, TextIO, Tuple, Union
BINARY_VERSION = 1
@@ -231,7 +231,7 @@ def write_binary(self, output: BinaryIO) -> None:
class _CBuilder:
"""Helper class for emitting binary data as a C-string literal."""
- entries: list[Tuple[Any, str]]
+ entries: list[Tuple[str, str]]
def __init__(self) -> None:
self.entries = []
More information about the lldb-commits
mailing list