[Lldb-commits] [lldb] ece4b75 - [lldb] Add C source output mode to formatter_bytecode.py (#184242)
via lldb-commits
lldb-commits at lists.llvm.org
Tue Mar 3 15:48:35 PST 2026
Author: Dave Lee
Date: 2026-03-03T15:48:30-08:00
New Revision: ece4b759327c03fa09edc90b802db3a74fa19e33
URL: https://github.com/llvm/llvm-project/commit/ece4b759327c03fa09edc90b802db3a74fa19e33
DIFF: https://github.com/llvm/llvm-project/commit/ece4b759327c03fa09edc90b802db3a74fa19e33.diff
LOG: [lldb] Add C source output mode to formatter_bytecode.py (#184242)
Add the ability to generate a C source file, which is in addition to the
existing functionality of generating binary.
An example of the generated source:
```c
#ifdef __APPLE__
#define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
#else
#define FORMATTER_SECTION ".lldbformatters"
#endif
__attribute__((used, section(FORMATTER_SECTION)))
unsigned char _Account_synthetic[] =
// version
"\x01"
// remaining record size
"\x15"
// type name size
"\x07"
// type name
"Account"
// flags
"\x00"
// sig_get_num_children
"\x02"
// program size
"\x02"
// program
"\x20\x01"
// sig_get_child_at_index
"\x04"
// program size
"\x06"
// program
"\x02\x20\x00\x23\x11\x60"
;
```
Added:
Modified:
lldb/examples/python/formatter_bytecode.py
Removed:
################################################################################
diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py
index 8f2f09834ff4f..1ad10d60fb974 100644
--- a/lldb/examples/python/formatter_bytecode.py
+++ b/lldb/examples/python/formatter_bytecode.py
@@ -17,6 +17,7 @@
import re
import io
+import textwrap
from dataclasses import dataclass
from typing import BinaryIO, TextIO, Tuple, Union
@@ -207,9 +208,7 @@ def validate(self):
raise ValueError(f"duplicate signature: {sig}")
seen.add(sig)
- def write_binary(self, output: BinaryIO) -> None:
- self.validate()
-
+ def _to_binary(self) -> bytes:
bin = bytearray()
bin.extend(_to_uleb(len(self.type_name)))
bin.extend(bytes(self.type_name, encoding="utf-8"))
@@ -219,9 +218,77 @@ def write_binary(self, output: BinaryIO) -> None:
bin.extend(_to_uleb(len(bc)))
bin.extend(bc)
+ return bytes(bin)
+
+ def write_binary(self, output: BinaryIO) -> None:
+ self.validate()
+
+ bin = self._to_binary()
output.write(_to_byte(BINARY_VERSION))
output.write(_to_uleb(len(bin)))
- output.write(bin)
+ output.write(self._to_binary())
+
+ class _CBuilder:
+ """Helper class for emitting binary data as a C-string literal."""
+
+ entries: list[Tuple[str, str]]
+
+ def __init__(self) -> None:
+ self.entries = []
+
+ def add_byte(self, x: int, comment: str) -> None:
+ self.add_bytes(_to_byte(x), comment)
+
+ def add_uleb(self, x: int, comment: str) -> None:
+ self.add_bytes(_to_uleb(x), comment)
+
+ def add_bytes(self, x: bytes, comment: str) -> None:
+ # Construct zero padded hex values with length two.
+ string = "".join(f"\\x{b:02x}" for b in x)
+ self.add_string(string, comment)
+
+ def add_string(self, string: str, comment: str) -> None:
+ self.entries.append((f'"{string}"', comment))
+
+ def write_source(self, output: TextIO) -> None:
+ self.validate()
+
+ size = len(self._to_binary())
+
+ b = self._CBuilder()
+ b.add_byte(BINARY_VERSION, "version")
+ b.add_uleb(size, "remaining record size")
+ b.add_uleb(len(self.type_name), "type name size")
+ b.add_string(self.type_name, "type name")
+ b.add_byte(self.flags, "flags")
+ for sig, bc in self.signatures:
+ b.add_byte(SIGNATURES[sig], f"sig_{sig}")
+ b.add_uleb(len(bc), "program size")
+ b.add_bytes(bc, "program")
+
+ print(
+ textwrap.dedent(
+ """
+ #ifdef __APPLE__
+ #define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
+ #else
+ #define FORMATTER_SECTION ".lldbformatters"
+ #endif
+ """
+ ),
+ file=output,
+ )
+ var_name = re.sub(r"\W", "_", self.type_name)
+ print(
+ "__attribute__((used, section(FORMATTER_SECTION)))",
+ file=output,
+ )
+ print(f"unsigned char _{var_name}_synthetic[] =", file=output)
+ indent = " "
+ for string, comment in b.entries:
+ print(f"{indent}// {comment}", file=output)
+ print(f"{indent}{string}", file=output)
+ print(";", file=output)
def compile_file(type_name: str, input: TextIO) -> BytecodeSection:
@@ -601,7 +668,7 @@ def next_byte():
################################################################################
-def _to_uleb(value: int) -> bytearray:
+def _to_uleb(value: int) -> bytes:
"""Encode an integer to ULEB128 bytes."""
if value < 0:
raise ValueError(f"negative number cannot be encoded to ULEB128: {value}")
@@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray:
if value == 0:
break
- return result
+ return bytes(result)
def _from_uleb(stream: BinaryIO) -> int:
@@ -665,18 +732,27 @@ def _main():
"--output",
help="output file (required for --compile)",
)
+ parser.add_argument(
+ "-f",
+ "--format",
+ choices=("binary", "c"),
+ default="binary",
+ help="output file format",
+ )
parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
args = parser.parse_args()
if args.compile:
if not args.output:
parser.error("--output is required with --compile")
- with (
- open(args.input) as input,
- open(args.output, "wb") as output,
- ):
+ with open(args.input) as input:
section = compile_file(args.type_name, input)
- section.write_binary(output)
+ if args.format == "binary":
+ with open(args.output, "wb") as output:
+ section.write_binary(output)
+ else: # args.format == "c"
+ with open(args.output, "w") as output:
+ section.write_source(output)
elif args.disassemble:
if args.output:
with (
@@ -758,4 +834,42 @@ def run_disassemble(binary):
with self.assertRaises(ValueError):
run_compile("MyType", "@summary: 1u return\n at summary: 2u return")
+ def test_write_source(self):
+ # Use the Account example from main.cpp as a reference, whose
+ # exact byte values are known.
+ section = BytecodeSection(
+ type_name="Account",
+ flags=0,
+ signatures=[
+ ("get_num_children", bytes([0x20, 0x01])),
+ ("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 0x11, 0x60])),
+ ],
+ )
+ out = io.StringIO()
+ section.write_source(out)
+ src = out.getvalue()
+
+ self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", src)
+ self.assertIn("unsigned char _Account_synthetic[] =", src)
+ self.assertIn('"\\x01"', src) # version
+ self.assertIn('"\\x15"', src) # record size (21)
+ self.assertIn('"\\x07"', src) # type name size (7)
+ self.assertIn('"Account"', src) # type name
+ self.assertIn('"\\x00"', src) # flags
+ self.assertIn('"\\x02"', src) # sig_get_num_children
+ self.assertIn('"\\x20\\x01"', src) # program
+ self.assertIn('"\\x04"', src) # sig_get_child_at_index
+ self.assertIn('"\\x06"', src) # program size
+ self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src) # program
+ self.assertIn("// version", src)
+ self.assertIn("// type name", src)
+ self.assertIn("// program", src)
+ # Semicolon terminates the array initializer.
+ self.assertEqual(src.count(";"), 1)
+
+ # Non-identifier characters in the type name are replaced with '_'.
+ out2 = io.StringIO()
+ BytecodeSection("std::vector<int>", 0, []).write_source(out2)
+ self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue())
+
unittest.main(argv=[__file__])
More information about the lldb-commits
mailing list