[Lldb-commits] [lldb] [lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples (PR #113398)
via lldb-commits
lldb-commits at lists.llvm.org
Tue Oct 22 16:44:06 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {darker}-->
:warning: Python code formatter, darker found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
darker --check --diff -r 2e0506f83bfde6db93454bdf28e4a71c160d4f5b...642525847da7d874a127f94f155fd738e3d78196 lldb/examples/formatter-bytecode/compiler.py lldb/examples/formatter-bytecode/test/formatter.py
``````````
</details>
<details>
<summary>
View the diff from darker here.
</summary>
``````````diff
--- compiler.py 2024-10-22 23:29:50.000000 +0000
+++ compiler.py 2024-10-22 23:43:37.694386 +0000
@@ -13,171 +13,183 @@
type_Object = 4
type_Type = 5
# Opcodes
opcode = dict()
+
+
def define_opcode(n, mnemonic, name):
- globals()['op_'+name] = n
+ globals()["op_" + name] = n
if mnemonic:
opcode[mnemonic] = n
opcode[n] = mnemonic
-define_opcode(1, 'dup', 'dup')
-define_opcode(2, 'drop', 'drop')
-define_opcode(3, 'pick', 'pick')
-define_opcode(4, 'over', 'over')
-define_opcode(5, 'swap', 'swap')
-define_opcode(6, 'rot', 'rot')
-
-define_opcode(0x10, '{', 'begin')
-define_opcode(0x11, 'if', 'if')
-define_opcode(0x12, 'ifelse', 'ifelse')
-
-define_opcode(0x20, None, 'lit_uint')
-define_opcode(0x21, None, 'lit_int')
-define_opcode(0x22, None, 'lit_string')
-define_opcode(0x23, None, 'lit_selector')
-
-define_opcode(0x30, '+', 'plus')
-define_opcode(0x31, '-', 'minus')
-define_opcode(0x32, '*', 'mul')
-define_opcode(0x33, '/', 'div')
-define_opcode(0x34, '%', 'mod')
-define_opcode(0x35, '<<', 'shl')
-define_opcode(0x36, '>>', 'shr')
-define_opcode(0x37, 'shra', 'shra')
-
-define_opcode(0x40, '&', 'and')
-define_opcode(0x41, '|', 'or')
-define_opcode(0x42, '^', 'xor')
-define_opcode(0x43, '~', 'not')
-
-define_opcode(0x50, '=', 'eq')
-define_opcode(0x51, '!=', 'neq')
-define_opcode(0x52, '<', 'lt')
-define_opcode(0x53, '>', 'gt')
-define_opcode(0x54, '=<', 'le')
-define_opcode(0x55, '>=', 'ge')
-
-define_opcode(0x60, 'call', 'call')
+
+define_opcode(1, "dup", "dup")
+define_opcode(2, "drop", "drop")
+define_opcode(3, "pick", "pick")
+define_opcode(4, "over", "over")
+define_opcode(5, "swap", "swap")
+define_opcode(6, "rot", "rot")
+
+define_opcode(0x10, "{", "begin")
+define_opcode(0x11, "if", "if")
+define_opcode(0x12, "ifelse", "ifelse")
+
+define_opcode(0x20, None, "lit_uint")
+define_opcode(0x21, None, "lit_int")
+define_opcode(0x22, None, "lit_string")
+define_opcode(0x23, None, "lit_selector")
+
+define_opcode(0x30, "+", "plus")
+define_opcode(0x31, "-", "minus")
+define_opcode(0x32, "*", "mul")
+define_opcode(0x33, "/", "div")
+define_opcode(0x34, "%", "mod")
+define_opcode(0x35, "<<", "shl")
+define_opcode(0x36, ">>", "shr")
+define_opcode(0x37, "shra", "shra")
+
+define_opcode(0x40, "&", "and")
+define_opcode(0x41, "|", "or")
+define_opcode(0x42, "^", "xor")
+define_opcode(0x43, "~", "not")
+
+define_opcode(0x50, "=", "eq")
+define_opcode(0x51, "!=", "neq")
+define_opcode(0x52, "<", "lt")
+define_opcode(0x53, ">", "gt")
+define_opcode(0x54, "=<", "le")
+define_opcode(0x55, ">=", "ge")
+
+define_opcode(0x60, "call", "call")
# Function signatures
sig_summary = 0
sig_init = 1
sig_get_num_children = 2
sig_get_child_index = 3
sig_get_child_at_index = 4
# Selectors
selector = dict()
+
+
def define_selector(n, name):
- globals()['sel_'+name] = n
- selector['@'+name] = n
- selector[n] = '@'+name
-
-define_selector(0, 'summary')
-define_selector(1, 'type_summary')
-
-define_selector(0x10, 'get_num_children')
-define_selector(0x11, 'get_child_at_index')
-define_selector(0x12, 'get_child_with_name')
-define_selector(0x13, 'get_child_index')
-define_selector(0x15, 'get_type')
-define_selector(0x16, 'get_template_argument_type')
-define_selector(0x20, 'get_value')
-define_selector(0x21, 'get_value_as_unsigned')
-define_selector(0x22, 'get_value_as_signed')
-define_selector(0x23, 'get_value_as_address')
-define_selector(0x24, 'cast')
-
-define_selector(0x40, 'read_memory_byte')
-define_selector(0x41, 'read_memory_uint32')
-define_selector(0x42, 'read_memory_int32')
-define_selector(0x43, 'read_memory_unsigned')
-define_selector(0x44, 'read_memory_signed')
-define_selector(0x45, 'read_memory_address')
-define_selector(0x46, 'read_memory')
-
-define_selector(0x50, 'fmt')
-define_selector(0x51, 'sprintf')
-define_selector(0x52, 'strlen')
+ globals()["sel_" + name] = n
+ selector["@" + name] = n
+ selector[n] = "@" + name
+
+
+define_selector(0, "summary")
+define_selector(1, "type_summary")
+
+define_selector(0x10, "get_num_children")
+define_selector(0x11, "get_child_at_index")
+define_selector(0x12, "get_child_with_name")
+define_selector(0x13, "get_child_index")
+define_selector(0x15, "get_type")
+define_selector(0x16, "get_template_argument_type")
+define_selector(0x20, "get_value")
+define_selector(0x21, "get_value_as_unsigned")
+define_selector(0x22, "get_value_as_signed")
+define_selector(0x23, "get_value_as_address")
+define_selector(0x24, "cast")
+
+define_selector(0x40, "read_memory_byte")
+define_selector(0x41, "read_memory_uint32")
+define_selector(0x42, "read_memory_int32")
+define_selector(0x43, "read_memory_unsigned")
+define_selector(0x44, "read_memory_signed")
+define_selector(0x45, "read_memory_address")
+define_selector(0x46, "read_memory")
+
+define_selector(0x50, "fmt")
+define_selector(0x51, "sprintf")
+define_selector(0x52, "strlen")
################################################################################
# Compiler.
################################################################################
+
def compile(assembler: str) -> bytearray:
"""Compile assembler into bytecode"""
# This is a stack of all in-flight/unterminated blocks.
bytecode = [bytearray()]
def emit(byte):
bytecode[-1].append(byte)
- tokens = list(assembler.split(' '))
+ tokens = list(assembler.split(" "))
tokens.reverse()
while tokens:
tok = tokens.pop()
- if tok == '': pass
- elif tok == '{': bytecode.append(bytearray())
- elif tok == '}':
+ if tok == "":
+ pass
+ elif tok == "{":
+ bytecode.append(bytearray())
+ elif tok == "}":
block = bytecode.pop()
emit(op_begin)
- emit(len(block)) # FIXME: uleb
+ emit(len(block)) # FIXME: uleb
bytecode[-1].extend(block)
elif tok[0].isdigit():
- if tok[-1] == 'u':
+ if tok[-1] == "u":
emit(op_lit_uint)
- emit(int(tok[:-1])) # FIXME
+ emit(int(tok[:-1])) # FIXME
else:
emit(op_lit_int)
- emit(int(tok)) # FIXME
- elif tok[0] == '@':
+ emit(int(tok)) # FIXME
+ elif tok[0] == "@":
emit(op_lit_selector)
emit(selector[tok])
elif tok[0] == '"':
s = bytearray()
done = False
chrs = tok[1:]
while not done:
quoted = False
for c in chrs:
if quoted:
- s.append(ord(c)) #FIXME
+ s.append(ord(c)) # FIXME
quoted = False
- elif c == '\\':
+ elif c == "\\":
quoted = True
elif c == '"':
- done = True;
- break; # FIXME assert this is last in token
+ done = True
+ break
+ # FIXME assert this is last in token
else:
s.append(ord(c))
if not done:
- s.append(ord(' '))
+ s.append(ord(" "))
chrs = tokens.pop()
emit(op_lit_string)
emit(len(s))
bytecode[-1].extend(s)
else:
emit(opcode[tok])
- assert(len(bytecode) == 1) # unterminated {
+ assert len(bytecode) == 1 # unterminated {
return bytecode[0]
################################################################################
# Disassembler.
################################################################################
+
def disassemble(bytecode: bytearray) -> (str, int):
"""Disassemble bytecode into (assembler, token starts)"""
asm = ""
all_bytes = list(bytecode)
all_bytes.reverse()
blocks = []
tokens = [0]
+
def next_byte():
"""Fetch the next byte in the bytecode and keep track of all
in-flight blocks"""
for i in range(len(blocks)):
blocks[i] -= 1
@@ -185,17 +197,17 @@
return all_bytes.pop()
while all_bytes:
b = next_byte()
if b == op_begin:
- asm += '{'
+ asm += "{"
length = next_byte()
blocks.append(length)
elif b == op_lit_uint:
b = next_byte()
- asm += str(b) # FIXME uleb
- asm += 'u'
+ asm += str(b) # FIXME uleb
+ asm += "u"
elif b == op_lit_int:
b = next_byte()
asm += str(b)
elif b == op_lit_selector:
b = next_byte()
@@ -209,28 +221,30 @@
asm += '"' + repr(s)[2:]
else:
asm += opcode[b]
while blocks and blocks[-1] == 0:
- asm += ' }'
+ asm += " }"
blocks.pop()
if all_bytes:
- asm += ' '
+ asm += " "
if blocks:
asm += "ERROR"
return asm, tokens
################################################################################
# Interpreter.
################################################################################
+
def count_fmt_params(fmt: str) -> int:
"""Count the number of parameters in a format string"""
from string import Formatter
+
f = Formatter()
n = 0
for _, name, _, _ in f.parse(fmt):
if name > n:
n = name
@@ -244,25 +258,30 @@
def trace():
"""print a trace of the execution for debugging purposes"""
def fmt(d):
- if isinstance(d, int): return str(d)
- if isinstance(d, str): return d
+ if isinstance(d, int):
+ return str(d)
+ if isinstance(d, str):
+ return d
return repr(type(d))
pc, end = frame[-1]
asm, tokens = disassemble(bytecode)
- print('=== frame = {1}, data = {2}, opcode = {0}'
- .format(opcode[b], frame, [fmt(d) for d in data]))
+ print(
+ "=== frame = {1}, data = {2}, opcode = {0}".format(
+ opcode[b], frame, [fmt(d) for d in data]
+ )
+ )
print(asm)
- print(' '*(tokens[pc]) + '^')
+ print(" " * (tokens[pc]) + "^")
def next_byte():
"""Fetch the next byte and update the PC"""
pc, end = frame[-1]
- assert(pc < len(bytecode))
+ assert pc < len(bytecode)
b = bytecode[pc]
frame[-1] = pc + 1, end
# At the end of a block?
while pc >= end:
frame.pop()
@@ -280,14 +299,18 @@
if b == None:
break
if tracing:
trace()
# Data stack manipulation.
- if b == op_dup: data.append(data[-1])
- elif b == op_drop: data.pop()
- elif b == op_pick: data.append(data[data.pop()])
- elif b == op_over: data.append(data[-2])
+ if b == op_dup:
+ data.append(data[-1])
+ elif b == op_drop:
+ data.pop()
+ elif b == op_pick:
+ data.append(data[data.pop()])
+ elif b == op_over:
+ data.append(data[-2])
elif b == op_swap:
x = data.pop()
y = data.pop()
data.append(x)
data.append(y)
@@ -301,11 +324,11 @@
# Control stack manipulation.
elif b == op_begin:
length = next_byte()
pc, end = frame[-1]
- control.append((pc, pc+length))
+ control.append((pc, pc + length))
frame[-1] = pc + length, end
elif b == op_if:
if data.pop():
frame.append(control.pop())
elif b == op_ifelse:
@@ -333,28 +356,51 @@
s += chr(next_byte())
length -= 1
data.append(s)
# Arithmetic, logic, etc.
- elif b == op_plus: data.append(data.pop() + data.pop())
- elif b == op_minus: data.append(- data.pop() + data.pop())
- elif b == op_mul: data.append(data.pop() * data.pop())
- elif b == op_div: y = data.pop(); data.append(data.pop() / y)
- elif b == op_mod: y = data.pop(); data.append(data.pop() % y)
- elif b == op_shl: y = data.pop(); data.append(data.pop() << y)
- elif b == op_shr: y = data.pop(); data.append(data.pop() >> y)
- elif b == op_shra: y = data.pop(); data.append(data.pop() >> y) # FIXME
- elif b == op_and: data.append(data.pop() & data.pop())
- elif b == op_or: data.append(data.pop() | data.pop())
- elif b == op_xor: data.append(data.pop() ^ data.pop())
- elif b == op_not: data.append(not data.pop())
- elif b == op_eq: data.append(data.pop() == data.pop())
- elif b == op_neq: data.append(data.pop() != data.pop())
- elif b == op_lt: data.append(data.pop() > data.pop())
- elif b == op_gt: data.append(data.pop() < data.pop())
- elif b == op_le: data.append(data.pop() >= data.pop())
- elif b == op_ge: data.append(data.pop() <= data.pop())
+ elif b == op_plus:
+ data.append(data.pop() + data.pop())
+ elif b == op_minus:
+ data.append(-data.pop() + data.pop())
+ elif b == op_mul:
+ data.append(data.pop() * data.pop())
+ elif b == op_div:
+ y = data.pop()
+ data.append(data.pop() / y)
+ elif b == op_mod:
+ y = data.pop()
+ data.append(data.pop() % y)
+ elif b == op_shl:
+ y = data.pop()
+ data.append(data.pop() << y)
+ elif b == op_shr:
+ y = data.pop()
+ data.append(data.pop() >> y)
+ elif b == op_shra:
+ y = data.pop()
+ data.append(data.pop() >> y) # FIXME
+ elif b == op_and:
+ data.append(data.pop() & data.pop())
+ elif b == op_or:
+ data.append(data.pop() | data.pop())
+ elif b == op_xor:
+ data.append(data.pop() ^ data.pop())
+ elif b == op_not:
+ data.append(not data.pop())
+ elif b == op_eq:
+ data.append(data.pop() == data.pop())
+ elif b == op_neq:
+ data.append(data.pop() != data.pop())
+ elif b == op_lt:
+ data.append(data.pop() > data.pop())
+ elif b == op_gt:
+ data.append(data.pop() < data.pop())
+ elif b == op_le:
+ data.append(data.pop() >= data.pop())
+ elif b == op_ge:
+ data.append(data.pop() <= data.pop())
# Function calls.
elif b == op_call:
sel = data.pop()
if sel == sel_summary:
@@ -400,38 +446,41 @@
for i in range(n):
args.append(data.pop())
data.append(fmt.format(*args))
else:
print("not implemented: " + selector[sel])
- assert(False)
+ assert False
pass
return data[-1]
################################################################################
# Tests.
################################################################################
import unittest
+
class TestCompiler(unittest.TestCase):
-
def test(self):
- self.assertEqual(compile("1u dup").hex(), '200101')
- self.assertEqual(compile("\"1u dup\"").hex(), '2206317520647570')
- self.assertEqual(compile("16 < { dup } if").hex(), '21105210010111')
- self.assertEqual(compile("{ { \" } \" } }").hex(), '100710052203207d20')
+ self.assertEqual(compile("1u dup").hex(), "200101")
+ self.assertEqual(compile('"1u dup"').hex(), "2206317520647570")
+ self.assertEqual(compile("16 < { dup } if").hex(), "21105210010111")
+ self.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20")
def roundtrip(asm):
self.assertEqual(disassemble(compile(asm))[0], asm)
roundtrip("1u dup")
- roundtrip("1u dup \"1u dup\"")
+ roundtrip('1u dup "1u dup"')
roundtrip("16 < { dup } if")
- roundtrip("{ { \" } \" } }")
+ roundtrip('{ { " } " } }')
self.assertEqual(interpret(compile("1 1 +"), [], []), 2)
self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4)
- self.assertEqual(interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes")
-
-if __name__ == '__main__':
+ self.assertEqual(
+ interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes"
+ )
+
+
+if __name__ == "__main__":
unittest.main()
--- test/formatter.py 2024-10-22 23:29:50.000000 +0000
+++ test/formatter.py 2024-10-22 23:43:37.737784 +0000
@@ -3,10 +3,11 @@
with the implementation replaced by bytecode.
"""
from __future__ import annotations
from compiler import *
import lldb
+
def __lldb_init_module(debugger, internal_dict):
debugger.HandleCommand(
"type synthetic add -w llvm "
f"-l {__name__}.MyOptionalSynthProvider "
@@ -16,22 +17,27 @@
"type summary add -w llvm "
f"-e -F {__name__}.MyOptionalSummaryProvider "
'-x "^MyOptional<.+>$"'
)
-def evaluate(assembler : str, data : list):
+
+def evaluate(assembler: str, data: list):
bytecode = compile(assembler)
trace = True
if trace:
- print("Compiled to {0} bytes of bytecode:\n0x{1}"
- .format(len(bytecode), bytecode.hex()))
- result = interpret(bytecode, [], data, False) #trace)
+ print(
+ "Compiled to {0} bytes of bytecode:\n0x{1}".format(
+ len(bytecode), bytecode.hex()
+ )
+ )
+ result = interpret(bytecode, [], data, False) # trace)
if trace:
print("--> {0}".format(result))
return result
-#def GetOptionalValue(valobj):
+
+# def GetOptionalValue(valobj):
# storage = valobj.GetChildMemberWithName("Storage")
# if not storage:
# storage = valobj
#
# failure = 2
@@ -46,75 +52,80 @@
# storage = storage.GetChildMemberWithName("value")
# return storage.Cast(underlying_type)
def MyOptionalSummaryProvider(valobj, internal_dict):
-# val = GetOptionalValue(valobj)
-# if val is None:
-# return "None"
-# if val.summary:
-# return val.summary
-# return val.GetValue()
+ # val = GetOptionalValue(valobj)
+ # if val is None:
+ # return "None"
+ # if val.summary:
+ # return val.summary
+ # return val.GetValue()
summary = ""
- summary += ' dup "Storage" @get_child_with_name call' # valobj storage
- summary += ' dup { swap } if drop' # storage
- summary += ' dup "hasVal" @get_child_with_name call' # storage
- summary += ' @get_value_as_unsigned call' # storage int(hasVal)
+ summary += ' dup "Storage" @get_child_with_name call' # valobj storage
+ summary += " dup { swap } if drop" # storage
+ summary += ' dup "hasVal" @get_child_with_name call' # storage
+ summary += " @get_value_as_unsigned call" # storage int(hasVal)
summary += ' dup 2 = { drop "<could not read MyOptional>" } {'
summary += ' 0 = { "None" } {'
- summary += ' dup @get_type call 0 @get_template_argument_type call' # storage type
- summary += ' swap' # type storage
- summary += ' "value" @get_child_with_name call' # type value
- summary += ' swap @cast call' # type(value)
+ summary += (
+ " dup @get_type call 0 @get_template_argument_type call" # storage type
+ )
+ summary += " swap" # type storage
+ summary += ' "value" @get_child_with_name call' # type value
+ summary += " swap @cast call" # type(value)
summary += ' dup 0 = { "None" } {'
- summary += ' dup @summary call { @summary call } { @get_value call } ifelse'
- summary += ' } ifelse'
- summary += ' } ifelse'
- summary += ' } ifelse'
+ summary += " dup @summary call { @summary call } { @get_value call } ifelse"
+ summary += " } ifelse"
+ summary += " } ifelse"
+ summary += " } ifelse"
return evaluate(summary, [valobj])
+
class MyOptionalSynthProvider:
"""Provides deref support to llvm::Optional<T>"""
def __init__(self, valobj, internal_dict):
self.valobj = valobj
def num_children(self):
- #return self.valobj.num_children
- num_children = ' @get_num_children call'
+ # return self.valobj.num_children
+ num_children = " @get_num_children call"
return evaluate(num_children, [self.valobj])
def get_child_index(self, name):
- #if name == "$$dereference$$":
+ # if name == "$$dereference$$":
# return self.valobj.num_children
- #return self.valobj.GetIndexOfChildWithName(name)
+ # return self.valobj.GetIndexOfChildWithName(name)
get_child_index = ' dup "$$dereference$$" ='
- get_child_index += ' { drop @get_num_children call } {' # obj name
- get_child_index += ' @get_child_index call' # index
- get_child_index += ' } ifelse'
+ get_child_index += " { drop @get_num_children call } {" # obj name
+ get_child_index += " @get_child_index call" # index
+ get_child_index += " } ifelse"
return evaluate(get_child_index, [self.valobj, name])
def get_child_at_index(self, index):
- #if index < self.valobj.num_children:
+ # if index < self.valobj.num_children:
# return self.valobj.GetChildAtIndex(index)
- #return GetOptionalValue(self.valobj) or lldb.SBValue()
- get_child_at_index = ' over over swap' # obj index index obj
- get_child_at_index += ' @get_num_children call' # obj index index n
- get_child_at_index += ' < { @get_child_at_index call } {' # obj index
+ # return GetOptionalValue(self.valobj) or lldb.SBValue()
+ get_child_at_index = " over over swap" # obj index index obj
+ get_child_at_index += " @get_num_children call" # obj index index n
+ get_child_at_index += " < { @get_child_at_index call } {" # obj index
- get_opt_val = ' dup "Storage" @get_child_with_name call' # valobj storage
- get_opt_val += ' dup { swap } if drop' # storage
- get_opt_val += ' dup "hasVal" @get_child_with_name call' # storage
- get_opt_val += ' @get_value_as_unsigned call' # storage int(hasVal)
+ get_opt_val = ' dup "Storage" @get_child_with_name call' # valobj storage
+ get_opt_val += " dup { swap } if drop" # storage
+ get_opt_val += ' dup "hasVal" @get_child_with_name call' # storage
+ get_opt_val += " @get_value_as_unsigned call" # storage int(hasVal)
get_opt_val += ' dup 2 = { drop "<could not read MyOptional>" } {'
get_opt_val += ' 0 = { "None" } {'
- get_opt_val += ' dup @get_type call 0 @get_template_argument_type call' # storage type
- get_opt_val += ' swap' # type storage
- get_opt_val += ' "value" @get_child_with_name call' # type value
- get_opt_val += ' swap @cast call' # type(value)
- get_opt_val += ' } ifelse'
- get_opt_val += ' } ifelse'
+ get_opt_val += (
+ " dup @get_type call 0 @get_template_argument_type call" # storage type
+ )
+ get_opt_val += " swap" # type storage
+ get_opt_val += ' "value" @get_child_with_name call' # type value
+ get_opt_val += " swap @cast call" # type(value)
+ get_opt_val += " } ifelse"
+ get_opt_val += " } ifelse"
get_child_at_index += get_opt_val
- get_child_at_index += ' } ifelse'
+ get_child_at_index += " } ifelse"
return evaluate(get_child_at_index, [self.valobj, index])
``````````
</details>
https://github.com/llvm/llvm-project/pull/113398
More information about the lldb-commits
mailing list