[Lldb-commits] [lldb] [lldb] Implement a formatter bytecode interpreter in C++ (PR #114333)
via lldb-commits
lldb-commits at lists.llvm.org
Wed Oct 30 17:04:21 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: Adrian Prantl (adrian-prantl)
<details>
<summary>Changes</summary>
Compared to the python version, this also does type checking and error
handling, so it's slightly longer, however, it's still comfortably
under 500 lines.
See https://discourse.llvm.org/t/a-bytecode-for-lldb-data-formatters/82696 for more context!
This is currently a draft, I still want to add more tests and also extend the metadata with (show children) flag and potentially others.
---
Patch is 47.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114333.diff
19 Files Affected:
- (modified) lldb/include/lldb/DataFormatters/TypeSummary.h (+21-1)
- (modified) lldb/include/lldb/lldb-enumerations.h (+2)
- (modified) lldb/source/Core/Section.cpp (+4)
- (modified) lldb/source/DataFormatters/CMakeLists.txt (+1)
- (added) lldb/source/DataFormatters/FormatterBytecode.cpp (+576)
- (added) lldb/source/DataFormatters/FormatterBytecode.def (+101)
- (added) lldb/source/DataFormatters/FormatterBytecode.h (+63)
- (modified) lldb/source/DataFormatters/TypeSummary.cpp (+71-3)
- (modified) lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (+2)
- (modified) lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp (+8)
- (modified) lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp (+2)
- (modified) lldb/source/Symbol/ObjectFile.cpp (+1)
- (modified) lldb/source/Target/Target.cpp (+151)
- (added) lldb/test/API/functionalities/data-formatter/bytecode-summary/Makefile (+2)
- (added) lldb/test/API/functionalities/data-formatter/bytecode-summary/TestBytecodeSummary.py (+14)
- (added) lldb/test/API/functionalities/data-formatter/bytecode-summary/main.cpp (+36)
- (added) lldb/test/API/functionalities/data-formatter/embedded-summary/Makefile (+2)
- (added) lldb/test/API/functionalities/data-formatter/embedded-summary/TestEmbeddedTypeSummary.py (+12)
- (added) lldb/test/API/functionalities/data-formatter/embedded-summary/main.c (+22)
``````````diff
diff --git a/lldb/include/lldb/DataFormatters/TypeSummary.h b/lldb/include/lldb/DataFormatters/TypeSummary.h
index 382824aa2813da..0d8e46fa0b1598 100644
--- a/lldb/include/lldb/DataFormatters/TypeSummary.h
+++ b/lldb/include/lldb/DataFormatters/TypeSummary.h
@@ -22,6 +22,10 @@
#include "lldb/Utility/Status.h"
#include "lldb/Utility/StructuredData.h"
+namespace llvm {
+class MemoryBuffer;
+}
+
namespace lldb_private {
class TypeSummaryOptions {
public:
@@ -44,7 +48,7 @@ class TypeSummaryOptions {
class TypeSummaryImpl {
public:
- enum class Kind { eSummaryString, eScript, eCallback, eInternal };
+ enum class Kind { eSummaryString, eScript, eBytecode, eCallback, eInternal };
virtual ~TypeSummaryImpl() = default;
@@ -409,6 +413,22 @@ struct ScriptSummaryFormat : public TypeSummaryImpl {
ScriptSummaryFormat(const ScriptSummaryFormat &) = delete;
const ScriptSummaryFormat &operator=(const ScriptSummaryFormat &) = delete;
};
+
+/// A summary formatter that is defined in LLDB formmater bytecode.
+class BytecodeSummaryFormat : public TypeSummaryImpl {
+ std::unique_ptr<llvm::MemoryBuffer> m_bytecode;
+public:
+ BytecodeSummaryFormat(const TypeSummaryImpl::Flags &flags,
+ std::unique_ptr<llvm::MemoryBuffer> bytecode);
+ bool FormatObject(ValueObject *valobj, std::string &dest,
+ const TypeSummaryOptions &options) override;
+ std::string GetDescription() override;
+ std::string GetName() override;
+ static bool classof(const TypeSummaryImpl *S) {
+ return S->GetKind() == Kind::eBytecode;
+ }
+};
+
} // namespace lldb_private
#endif // LLDB_DATAFORMATTERS_TYPESUMMARY_H
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index 938f6e3abe8f2a..b2f0943d5a9260 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -761,6 +761,8 @@ enum SectionType {
eSectionTypeDWARFDebugLocListsDwo,
eSectionTypeDWARFDebugTuIndex,
eSectionTypeCTF,
+ eSectionTypeLLDBTypeSummaries,
+ eSectionTypeLLDBFormatters,
eSectionTypeSwiftModules,
};
diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp
index 0763e88d4608f4..3b5ca2c6785ef0 100644
--- a/lldb/source/Core/Section.cpp
+++ b/lldb/source/Core/Section.cpp
@@ -147,6 +147,8 @@ const char *Section::GetTypeAsCString() const {
return "dwarf-gnu-debugaltlink";
case eSectionTypeCTF:
return "ctf";
+ case eSectionTypeLLDBTypeSummaries:
+ return "lldb-type-summaries";
case eSectionTypeOther:
return "regular";
case eSectionTypeSwiftModules:
@@ -457,6 +459,8 @@ bool Section::ContainsOnlyDebugInfo() const {
case eSectionTypeDWARFAppleObjC:
case eSectionTypeDWARFGNUDebugAltLink:
case eSectionTypeCTF:
+ case eSectionTypeLLDBTypeSummaries:
+ case eSectionTypeLLDBFormatters:
case eSectionTypeSwiftModules:
return true;
}
diff --git a/lldb/source/DataFormatters/CMakeLists.txt b/lldb/source/DataFormatters/CMakeLists.txt
index 7f48a2785c73f5..17da138227d4f1 100644
--- a/lldb/source/DataFormatters/CMakeLists.txt
+++ b/lldb/source/DataFormatters/CMakeLists.txt
@@ -5,6 +5,7 @@ add_lldb_library(lldbDataFormatters NO_PLUGIN_DEPENDENCIES
FormatCache.cpp
FormatClasses.cpp
FormatManager.cpp
+ FormatterBytecode.cpp
FormattersHelpers.cpp
LanguageCategory.cpp
StringPrinter.cpp
diff --git a/lldb/source/DataFormatters/FormatterBytecode.cpp b/lldb/source/DataFormatters/FormatterBytecode.cpp
new file mode 100644
index 00000000000000..7e8bfd3a370ce7
--- /dev/null
+++ b/lldb/source/DataFormatters/FormatterBytecode.cpp
@@ -0,0 +1,576 @@
+//===-- FormatterBytecode.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatterBytecode.h"
+#include "lldb/Core/ValueObject.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/Support/FormatProviders.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace lldb;
+namespace lldb_private {
+
+std::string toString(FormatterBytecode::OpCodes op) {
+ switch (op) {
+#define DEFINE_OPCODE(OP, MNEMONIC, NAME) \
+ case OP: { \
+ const char *s = MNEMONIC; \
+ return s ? s : #NAME; \
+ }
+#include "FormatterBytecode.def"
+#undef DEFINE_SIGNATURE
+ }
+ return llvm::utostr(op);
+}
+
+std::string toString(FormatterBytecode::Selectors sel) {
+ switch (sel) {
+#define DEFINE_SELECTOR(ID, NAME) \
+ case ID: \
+ return "@" #NAME;
+#include "FormatterBytecode.def"
+#undef DEFINE_SIGNATURE
+ }
+ return "@"+llvm::utostr(sel);
+}
+
+std::string toString(FormatterBytecode::Signatures sig) {
+ switch (sig) {
+#define DEFINE_SIGNATURE(ID, NAME) \
+ case ID: \
+ return "@" #NAME;
+#include "FormatterBytecode.def"
+#undef DEFINE_SIGNATURE
+ }
+ return llvm::utostr(sig);
+}
+
+std::string toString(const FormatterBytecode::DataStack &data) {
+ std::string s;
+ llvm::raw_string_ostream os(s);
+ os << "[ ";
+ for (auto &d : data) {
+ if (auto s = std::get_if<std::string>(&d))
+ os << '"' << *s << '"';
+ else if (auto u = std::get_if<uint64_t>(&d))
+ os << *u << 'u';
+ else if (auto i = std::get_if<int64_t>(&d))
+ os << *i;
+ else if (auto valobj = std::get_if<ValueObjectSP>(&d)) {
+ if (!valobj->get())
+ os << "null";
+ else
+ os << "object(" << valobj->get()->GetValueAsCString() << ')';
+ } else if (auto type = std::get_if<CompilerType>(&d)) {
+ os << '(' << type->GetTypeName(true) << ')';
+ } else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&d)) {
+ os << toString(*sel);
+ }
+ os << ' ';
+ }
+ os << ']';
+ return s;
+}
+
+namespace FormatterBytecode {
+
+/// Implement the @format function.
+static llvm::Error FormatImpl(DataStack &data) {
+ auto fmt = data.Pop<std::string>();
+ auto replacements =
+ llvm::formatv_object_base::parseFormatString(fmt, 0, false);
+ std::string s;
+ llvm::raw_string_ostream os(s);
+ unsigned num_args = 0;
+ for (const auto &r : replacements)
+ if (r.Type == llvm::ReplacementType::Format)
+ num_args = std::max(num_args, r.Index);
+
+ if (data.size() < num_args)
+ return llvm::createStringError("not enough arguments");
+
+ for (const auto &r : replacements) {
+ if (r.Type == llvm::ReplacementType::Literal) {
+ os << r.Spec;
+ continue;
+ }
+ using namespace llvm::support::detail;
+ auto arg = data[data.size() - num_args + r.Index];
+ auto format = [&](format_adapter &&adapter) {
+ llvm::FmtAlign Align(adapter, r.Where, r.Width, r.Pad);
+ Align.format(os, r.Options);
+ };
+
+ if (auto s = std::get_if<std::string>(&arg))
+ format(build_format_adapter(s));
+ else if (auto u = std::get_if<uint64_t>(&arg))
+ format(build_format_adapter(u));
+ else if (auto i = std::get_if<int64_t>(&arg))
+ format(build_format_adapter(i));
+ else if (auto valobj = std::get_if<ValueObjectSP>(&arg)) {
+ if (!valobj->get())
+ format(build_format_adapter("null object"));
+ else
+ format(build_format_adapter(valobj->get()->GetValueAsCString()));
+ } else if (auto type = std::get_if<CompilerType>(&arg))
+ format(build_format_adapter(type->GetDisplayTypeName()));
+ else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&arg))
+ format(build_format_adapter(toString(*sel)));
+ }
+ data.Push(s);
+ return llvm::Error::success();
+}
+
+static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data,
+ DataType type) {
+ if (data.size() < 1)
+ return llvm::createStringError("not enough elements on data stack");
+
+ auto &elem = data.back();
+ switch (type) {
+ case Any:
+ break;
+ case String:
+ if (!std::holds_alternative<std::string>(elem))
+ return llvm::createStringError("expected String");
+ break;
+ case UInt:
+ if (!std::holds_alternative<uint64_t>(elem))
+ return llvm::createStringError("expected UInt");
+ break;
+ case Int:
+ if (!std::holds_alternative<int64_t>(elem))
+ return llvm::createStringError("expected Int");
+ break;
+ case Object:
+ if (!std::holds_alternative<ValueObjectSP>(elem))
+ return llvm::createStringError("expected Object");
+ break;
+ case Type:
+ if (!std::holds_alternative<CompilerType>(elem))
+ return llvm::createStringError("expected Type");
+ break;
+ case Selector:
+ if (!std::holds_alternative<Selectors>(elem))
+ return llvm::createStringError("expected Selector");
+ break;
+ }
+ return llvm::Error::success();
+}
+
+static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data,
+ DataType type1, DataType type2) {
+ if (auto error = TypeCheck(data, type2))
+ return error;
+ return TypeCheck(data.drop_back(), type1);
+}
+
+static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data,
+ DataType type1, DataType type2, DataType type3) {
+ if (auto error = TypeCheck(data, type3))
+ return error;
+ return TypeCheck(data.drop_back(1), type2, type1);
+}
+
+llvm::Error Interpret(std::vector<ControlStackElement> &control,
+ DataStack &data, Selectors sel) {
+ if (control.empty())
+ return llvm::Error::success();
+ // Since the only data types are single endian and ULEBs, the
+ // endianness should not matter.
+ llvm::DataExtractor cur_block(control.back(), true, 64);
+ llvm::DataExtractor::Cursor pc(0);
+
+ while (!control.empty()) {
+ /// Activate the top most block from the control stack.
+ auto activate_block = [&]() {
+ // Save the return address.
+ if (control.size() > 1)
+ control[control.size() - 2] = cur_block.getData().drop_front(pc.tell());
+ cur_block = llvm::DataExtractor(control.back(), true, 64);
+ if (pc)
+ pc = llvm::DataExtractor::Cursor(0);
+ };
+
+ /// Fetch the next byte in the instruction stream.
+ auto next_byte = [&]() -> uint8_t {
+ // At the end of the current block?
+ while (pc.tell() >= cur_block.size() && !control.empty()) {
+ if (control.size() == 1) {
+ control.pop_back();
+ return 0;
+ }
+ control.pop_back();
+ activate_block();
+ }
+
+ // Fetch the next instruction.
+ return cur_block.getU8(pc);
+ };
+
+ // Fetch the next opcode.
+ OpCodes opcode = (OpCodes)next_byte();
+ if (control.empty() || !pc)
+ return pc.takeError();
+
+ LLDB_LOGV(GetLog(LLDBLog::DataFormatters),
+ "[eval {0}] opcode={1}, control={2}, data={3}", toString(sel),
+ toString(opcode), control.size(), toString(data));
+
+
+ // Various shorthands to improve the readability of error handling.
+#define TYPE_CHECK(...) \
+ if (auto error = TypeCheck(data, __VA_ARGS__)) \
+ return error;
+
+ auto error = [&](const char *msg) {
+ return llvm::createStringError("{0} (opcode={1})", msg, toString(opcode).c_str());
+ };
+
+ switch (opcode) {
+ // Data stack manipulation.
+ case op_dup:
+ TYPE_CHECK(Any);
+ data.Push(data.back());
+ break;
+ case op_drop:
+ TYPE_CHECK(Any);
+ data.pop_back();
+ break;
+ case op_pick: {
+ TYPE_CHECK(UInt);
+ uint64_t idx = data.Pop<uint64_t>();
+ if (idx >= data.size())
+ return error("index out of bounds");
+ data.Push(data[idx]);
+ break;
+ }
+ case op_over:
+ TYPE_CHECK(Any, Any);
+ data.Push(data[data.size() - 2]);
+ break;
+ case op_swap: {
+ TYPE_CHECK(Any, Any);
+ auto x = data.PopAny();
+ auto y = data.PopAny();
+ data.Push(x);
+ data.Push(y);
+ break;
+ }
+ case op_rot: {
+ TYPE_CHECK(Any, Any, Any);
+ auto z = data.PopAny();
+ auto y = data.PopAny();
+ auto x = data.PopAny();
+ data.Push(z);
+ data.Push(x);
+ data.Push(y);
+ break;
+ }
+ // Control stack manipulation.
+ case op_begin: {
+ uint64_t length = cur_block.getULEB128(pc);
+ if (!pc)
+ return pc.takeError();
+ llvm::StringRef block = cur_block.getBytes(pc, length);
+ if (!pc)
+ return pc.takeError();
+ control.push_back(block);
+ break;
+ }
+ case op_if:
+ TYPE_CHECK(UInt);
+ if (data.Pop<uint64_t>() != 0) {
+ if (!cur_block.size())
+ return error("empty control stack");
+ activate_block();
+ }
+ break;
+ case op_ifelse:
+ TYPE_CHECK(UInt);
+ if (cur_block.size() < 2)
+ return error("empty control stack");
+ if (data.Pop<uint64_t>() == 0)
+ control[control.size()-2] = control.back();
+ control.pop_back();
+ activate_block();
+ break;
+ // Literals.
+ case op_lit_uint:
+ data.Push(cur_block.getULEB128(pc));
+ break;
+ case op_lit_int:
+ data.Push(cur_block.getSLEB128(pc));
+ break;
+ case op_lit_selector:
+ data.Push(Selectors(cur_block.getU8(pc)));
+ break;
+ case op_lit_string: {
+ uint64_t length = cur_block.getULEB128(pc);
+ llvm::StringRef bytes = cur_block.getBytes(pc, length);
+ data.Push(bytes.str());
+ break;
+ }
+ case op_as_uint: {
+ TYPE_CHECK(Int);
+ uint64_t casted;
+ int64_t val = data.Pop<int64_t>();
+ memcpy(&casted, &val, sizeof(val));
+ data.Push(casted);
+ break;
+ }
+ case op_as_int: {
+ TYPE_CHECK(UInt);
+ int64_t casted;
+ uint64_t val = data.Pop<uint64_t>();
+ memcpy(&casted, &val, sizeof(val));
+ data.Push(casted);
+ break;
+ }
+ case op_is_null: {
+ TYPE_CHECK(Object);
+ data.Push(data.Pop<ValueObjectSP>() ? 0ULL : 1ULL);
+ break;
+ }
+ // Arithmetic, logic, etc.
+#define BINOP_IMPL(OP, CHECK_ZERO) \
+ { \
+ TYPE_CHECK(Any, Any); \
+ auto y = data.PopAny(); \
+ if (std::holds_alternative<uint64_t>(y)) { \
+ if (CHECK_ZERO && !std::get<uint64_t>(y)) \
+ return error(#OP " by zero"); \
+ TYPE_CHECK(UInt); \
+ data.Push((uint64_t)(data.Pop<uint64_t>() OP std::get<uint64_t>(y))); \
+ } else if (std::holds_alternative<int64_t>(y)) { \
+ if (CHECK_ZERO && !std::get<int64_t>(y)) \
+ return error(#OP " by zero"); \
+ TYPE_CHECK(Int); \
+ data.Push((int64_t)(data.Pop<int64_t>() OP std::get<int64_t>(y))); \
+ } else \
+ return error("unsupported data types"); \
+ }
+#define BINOP(OP) BINOP_IMPL(OP, false)
+#define BINOP_CHECKZERO(OP) BINOP_IMPL(OP, true)
+ case op_plus:
+ BINOP(+);
+ break;
+ case op_minus:
+ BINOP(-);
+ break;
+ case op_mul:
+ BINOP(*);
+ break;
+ case op_div:
+ BINOP_CHECKZERO(/);
+ break;
+ case op_mod:
+ BINOP_CHECKZERO(%);
+ break;
+ case op_shl:
+#define SHIFTOP(OP) \
+ { \
+ TYPE_CHECK(Any, Any); \
+ if (std::holds_alternative<uint64_t>(data.back())) { \
+ uint64_t y = data.Pop<uint64_t>(); \
+ TYPE_CHECK(UInt); \
+ uint64_t x = data.Pop<uint64_t>(); \
+ if (y > 64) \
+ return error("shift out of bounds"); \
+ data.Push(x OP y); \
+ } else if (std::holds_alternative<int64_t>(data.back())) { \
+ uint64_t y = data.Pop<int64_t>(); \
+ TYPE_CHECK(Int); \
+ uint64_t x = data.Pop<int64_t>(); \
+ if (y > 64) \
+ return error("shift out of bounds"); \
+ if (y < 0) \
+ return error("shift out of bounds"); \
+ data.Push(x OP y); \
+ } else \
+ return error("unsupported data types"); \
+ }
+ SHIFTOP(<<);
+ break;
+ case op_shr:
+ SHIFTOP(<<);
+ break;
+ case op_and:
+ BINOP(&);
+ break;
+ case op_or:
+ BINOP(|);
+ break;
+ case op_xor:
+ BINOP(^);
+ break;
+ case op_not:
+ TYPE_CHECK(UInt);
+ data.Push(~data.Pop<uint64_t>());
+ break;
+ case op_eq:
+ BINOP(==);
+ break;
+ case op_neq:
+ BINOP(!=);
+ break;
+ case op_lt:
+ BINOP(<);
+ break;
+ case op_gt:
+ BINOP(>);
+ break;
+ case op_le:
+ BINOP(<=);
+ break;
+ case op_ge:
+ BINOP(>=);
+ break;
+ case op_call: {
+ TYPE_CHECK(Selector);
+ Selectors sel = data.Pop<Selectors>();
+
+ // Shorthand to improve readability.
+#define POP_VALOBJ(VALOBJ) \
+ auto VALOBJ = data.Pop<ValueObjectSP>(); \
+ if (!VALOBJ) \
+ return error("null object");
+
+ auto sel_error = [&](const char *msg) {
+ return llvm::createStringError("{0} (opcode={1}, selector={2})", msg,
+ toString(opcode).c_str(),
+ toString(sel).c_str());
+ };
+
+ switch (sel) {
+ case sel_summary: {
+ TYPE_CHECK(Object);
+ POP_VALOBJ(valobj);
+ const char *summary = valobj->GetSummaryAsCString();
+ data.Push(summary ? std::string(valobj->GetSummaryAsCString())
+ : std::string());
+ break;
+ }
+ case sel_get_num_children: {
+ TYPE_CHECK(Object);
+ POP_VALOBJ(valobj);
+ auto result = valobj->GetNumChildren();
+ if (!result)
+ return result.takeError();
+ data.Push((uint64_t)*result);
+ break;
+ }
+ case sel_get_child_at_index: {
+ TYPE_CHECK(Object, UInt);
+ auto index = data.Pop<uint64_t>();
+ POP_VALOBJ(valobj);
+ data.Push(valobj->GetChildAtIndex(index));
+ break;
+ }
+ case sel_get_child_with_name: {
+ TYPE_CHECK(Object, String);
+ auto name = data.Pop<std::string>();
+ POP_VALOBJ(valobj);
+ data.Push(valobj->GetChildMemberWithName(name));
+ break;
+ }
+ c...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114333
More information about the lldb-commits
mailing list