[Lldb-commits] [lldb] [lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples (PR #113398)
David Spickett via lldb-commits
lldb-commits at lists.llvm.org
Thu Oct 24 09:59:40 PDT 2024
================
@@ -0,0 +1,486 @@
+"""
+Specification, compiler, disassembler, and interpreter
+for LLDB dataformatter bytecode.
+
+See formatter-bytecode.md for more details.
+"""
+from __future__ import annotations
+
+# Types
+type_String = 1
+type_Int = 2
+type_UInt = 3
+type_Object = 4
+type_Type = 5
+
+# Opcodes
+opcode = dict()
+
+
+def define_opcode(n, mnemonic, name):
+ globals()["op_" + name] = n
+ if mnemonic:
+ opcode[mnemonic] = n
+ opcode[n] = mnemonic
+
+
+define_opcode(1, "dup", "dup")
+define_opcode(2, "drop", "drop")
+define_opcode(3, "pick", "pick")
+define_opcode(4, "over", "over")
+define_opcode(5, "swap", "swap")
+define_opcode(6, "rot", "rot")
+
+define_opcode(0x10, "{", "begin")
+define_opcode(0x11, "if", "if")
+define_opcode(0x12, "ifelse", "ifelse")
+
+define_opcode(0x20, None, "lit_uint")
+define_opcode(0x21, None, "lit_int")
+define_opcode(0x22, None, "lit_string")
+define_opcode(0x23, None, "lit_selector")
+
+define_opcode(0x30, "+", "plus")
+define_opcode(0x31, "-", "minus")
+define_opcode(0x32, "*", "mul")
+define_opcode(0x33, "/", "div")
+define_opcode(0x34, "%", "mod")
+define_opcode(0x35, "<<", "shl")
+define_opcode(0x36, ">>", "shr")
+define_opcode(0x37, "shra", "shra")
+
+define_opcode(0x40, "&", "and")
+define_opcode(0x41, "|", "or")
+define_opcode(0x42, "^", "xor")
+define_opcode(0x43, "~", "not")
+
+define_opcode(0x50, "=", "eq")
+define_opcode(0x51, "!=", "neq")
+define_opcode(0x52, "<", "lt")
+define_opcode(0x53, ">", "gt")
+define_opcode(0x54, "=<", "le")
+define_opcode(0x55, ">=", "ge")
+
+define_opcode(0x60, "call", "call")
+
+# Function signatures
+sig_summary = 0
+sig_init = 1
+sig_get_num_children = 2
+sig_get_child_index = 3
+sig_get_child_at_index = 4
+
+# Selectors
+selector = dict()
+
+
+def define_selector(n, name):
+ globals()["sel_" + name] = n
+ selector["@" + name] = n
+ selector[n] = "@" + name
+
+
+define_selector(0, "summary")
+define_selector(1, "type_summary")
+
+define_selector(0x10, "get_num_children")
+define_selector(0x11, "get_child_at_index")
+define_selector(0x12, "get_child_with_name")
+define_selector(0x13, "get_child_index")
+define_selector(0x15, "get_type")
+define_selector(0x16, "get_template_argument_type")
+define_selector(0x20, "get_value")
+define_selector(0x21, "get_value_as_unsigned")
+define_selector(0x22, "get_value_as_signed")
+define_selector(0x23, "get_value_as_address")
+define_selector(0x24, "cast")
+
+define_selector(0x40, "read_memory_byte")
+define_selector(0x41, "read_memory_uint32")
+define_selector(0x42, "read_memory_int32")
+define_selector(0x43, "read_memory_unsigned")
+define_selector(0x44, "read_memory_signed")
+define_selector(0x45, "read_memory_address")
+define_selector(0x46, "read_memory")
+
+define_selector(0x50, "fmt")
+define_selector(0x51, "sprintf")
+define_selector(0x52, "strlen")
+
+
+################################################################################
+# Compiler.
+################################################################################
+
+
+def compile(assembler: str) -> bytearray:
+ """Compile assembler into bytecode"""
+ # This is a stack of all in-flight/unterminated blocks.
+ bytecode = [bytearray()]
+
+ def emit(byte):
+ bytecode[-1].append(byte)
+
+ tokens = list(assembler.split(" "))
+ tokens.reverse()
+ while tokens:
+ tok = tokens.pop()
+ if tok == "":
+ pass
+ elif tok == "{":
+ bytecode.append(bytearray())
+ elif tok == "}":
+ block = bytecode.pop()
+ emit(op_begin)
+ emit(len(block)) # FIXME: uleb
+ bytecode[-1].extend(block)
+ elif tok[0].isdigit():
+ if tok[-1] == "u":
+ emit(op_lit_uint)
+ emit(int(tok[:-1])) # FIXME
+ else:
+ emit(op_lit_int)
+ emit(int(tok)) # FIXME
+ elif tok[0] == "@":
+ emit(op_lit_selector)
+ emit(selector[tok])
+ elif tok[0] == '"':
+ s = bytearray()
+ done = False
+ chrs = tok[1:]
+ while not done:
+ quoted = False
+ for c in chrs:
+ if quoted:
+ s.append(ord(c)) # FIXME
+ quoted = False
+ elif c == "\\":
+ quoted = True
+ elif c == '"':
+ done = True
+ break
+ # FIXME assert this is last in token
+ else:
+ s.append(ord(c))
+ if not done:
+ s.append(ord(" "))
+ chrs = tokens.pop()
+
+ emit(op_lit_string)
+ emit(len(s))
+ bytecode[-1].extend(s)
+ else:
+ emit(opcode[tok])
+ assert len(bytecode) == 1 # unterminated {
+ return bytecode[0]
+
+
+################################################################################
+# Disassembler.
+################################################################################
+
+
+def disassemble(bytecode: bytearray) -> (str, int):
+ """Disassemble bytecode into (assembler, token starts)"""
+ asm = ""
+ all_bytes = list(bytecode)
+ all_bytes.reverse()
+ blocks = []
+ tokens = [0]
+
+ def next_byte():
+ """Fetch the next byte in the bytecode and keep track of all
+ in-flight blocks"""
+ for i in range(len(blocks)):
+ blocks[i] -= 1
+ tokens.append(len(asm))
+ return all_bytes.pop()
+
+ while all_bytes:
+ b = next_byte()
+ if b == op_begin:
+ asm += "{"
+ length = next_byte()
+ blocks.append(length)
+ elif b == op_lit_uint:
+ b = next_byte()
+ asm += str(b) # FIXME uleb
+ asm += "u"
+ elif b == op_lit_int:
+ b = next_byte()
+ asm += str(b)
+ elif b == op_lit_selector:
+ b = next_byte()
+ asm += selector[b]
+ elif b == op_lit_string:
+ length = next_byte()
+ s = "'"
+ while length:
+ s += chr(next_byte())
+ length -= 1
+ asm += '"' + repr(s)[2:]
+ else:
+ asm += opcode[b]
+
+ while blocks and blocks[-1] == 0:
+ asm += " }"
+ blocks.pop()
+
+ if all_bytes:
+ asm += " "
+
+ if blocks:
+ asm += "ERROR"
+ return asm, tokens
+
+
+################################################################################
+# Interpreter.
+################################################################################
+
+
+def count_fmt_params(fmt: str) -> int:
+ """Count the number of parameters in a format string"""
+ from string import Formatter
+
+ f = Formatter()
+ n = 0
+ for _, name, _, _ in f.parse(fmt):
----------------
DavidSpickett wrote:
I didn't know you could parse a format string like this, very cool.
https://github.com/llvm/llvm-project/pull/113398
More information about the lldb-commits
mailing list