[Lldb-commits] [lldb] [LLDB] Add AST node classes, functions, etc. for Data Inspection Lang… (PR #95738)

via lldb-commits lldb-commits at lists.llvm.org
Sun Jun 16 22:47:12 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: None (cmtice)

<details>
<summary>Changes</summary>

…uage (DIL).

The Data Inspection Language (DIL), described in
https://discourse.llvm.org/t/rfc-data-inspection-language/69893 includes its own parser and expression evaluator.

This change defines the AST nodes, classes and functions which are used by the DIL parser and expression evaluator. It also adds the .ebnf file documenting the (current) expression language handled by the DIL parser and expression evaluator.

---

Patch is 50.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95738.diff


4 Files Affected:

- (added) lldb/docs/dil-expr-lang.ebnf (+165) 
- (added) lldb/include/lldb/Core/DILAST.h (+690) 
- (modified) lldb/source/Core/CMakeLists.txt (+1) 
- (added) lldb/source/Core/DILAST.cpp (+568) 


``````````diff
diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf
new file mode 100644
index 0000000000000..40c678c25cda5
--- /dev/null
+++ b/lldb/docs/dil-expr-lang.ebnf
@@ -0,0 +1,165 @@
+(* LLDB Debug Expressions, a subset of C++ *)
+(* Insired by https://www.nongnu.org/hcb *)
+
+expression = assignment_expression ;
+
+assignment_expression = conditional_expression
+                        logical_or_expression assignment_operator assignment_expression ;
+
+assignment_operator = "="
+                    | "*="
+                    | "/="
+                    | "%="
+                    | "+="
+                    | "-="
+                    | ">>="
+                    | "<<="
+                    | "&="
+                    | "^="
+                    | "|=" ;
+
+conditional_expression = logical_or_expression
+                       | logical_or_expression "?" expression ":" assignment_expression ;
+
+logical_or_expression = logical_and_expression {"||" logical_and_expression} ;
+
+logical_and_expression = inclusive_or_expression {"&&" inclusive_or_expression} ;
+
+inclusive_or_expression = exclusive_or_expression {"|" exclusive_or_expression} ;
+
+exclusive_or_expression = and_expression {"^" and_expression} ;
+
+and_expression = equality_expression {"&" equality_expression} ;
+
+equality_expression = relational_expression {"==" relational_expression}
+                    | relational_expression {"!=" relational_expression} ;
+
+relational_expression = shift_expression {"<" shift_expression}
+                      | shift_expression {">" shift_expression}
+                      | shift_expression {"<=" shift_expression}
+                      | shift_expression {">=" shift_expression} ;
+
+shift_expression = additive_expression {"<<" additive_expression}
+                 | additive_expression {">>" additive_expression} ;
+
+additive_expression = multiplicative_expression {"+" multiplicative_expression}
+                    | multiplicative_expression {"-" multiplicative_expression} ;
+
+multiplicative_expression = cast_expression {"*" cast_expression}
+                          | cast_expression {"/" cast_expression}
+                          | cast_expression {"%" cast_expression} ;
+
+cast_expression = unary_expression
+                | "(" type_id ")" cast_expression ;
+
+unary_expression = postfix_expression
+                 | "++" cast_expression
+                 | "--" cast_expression
+                 | unary_operator cast_expression
+                 | "sizeof" unary_expression
+                 | "sizeof" "(" type_id ")" ;
+
+unary_operator = "*" | "&" | "+" | "-" | "!" | "~" ;
+
+postfix_expression = primary_expression
+                   | postfix_expression "[" expression "]"
+                   | postfix_expression "." id_expression
+                   | postfix_expression "->" id_expression
+                   | postfix_expression "++"
+                   | postfix_expression "--"
+                   | static_cast "<" type_id ">" "(" expression ")" ;
+                   | dynamic_cast "<" type_id ">" "(" expression ")" ;
+                   | reinterpret_cast "<" type_id ">" "(" expression ")" ;
+
+primary_expression = numeric_literal
+                   | boolean_literal
+                   | pointer_literal
+                   | id_expression
+                   | "this"
+                   | "(" expression ")"
+                   | builtin_func ;
+
+type_id = type_specifier_seq [abstract_declarator] ;
+
+type_specifier_seq = type_specifier [type_specifier_seq] ;
+
+type_specifier = simple_type_specifier
+               | cv_qualifier ;
+
+simple_type_specifier = ["::"] [nested_name_specifier] type_name
+                      | "char"
+                      | "char16_t"
+                      | "char32_t"
+                      | "wchar_t"
+                      | "bool"
+                      | "short"
+                      | "int"
+                      | "long"
+                      | "signed"
+                      | "unsigned"
+                      | "float"
+                      | "double"
+                      | "void" ;
+
+nested_name_specifier = type_name "::"
+                      | namespace_name '::'
+                      | nested_name_specifier identifier "::"
+                      | nested_name_specifier simple_template_id "::"
+
+type_name = class_name
+          | enum_name
+          | typedef_name
+          | simple_template_id ;
+
+class_name = identifier ;
+
+enum_name = identifier ;
+
+typedef_name = identifier ;
+
+simple_template_id = template_name "<" [template_argument_list] ">" ;
+
+template_name = identifier ;
+
+template_argument_list = template_argument
+                       | template_argument_list "," template_argument ;
+
+template_argument = type_id
+                  | numeric_literal
+                  | id_expression ;
+
+namespace_name = identifier ;
+
+cv_qualifier = "const" | "volatile" ;
+
+cv_qualifier_seq = cv_qualifier [cv_qualifier_seq] ;
+
+abstract_declarator = ptr_operator [abstract_declarator] ;
+
+ptr_operator = "*" [cv_qualifier_seq]
+             | "&" ;
+
+id_expression = unqualified_id
+              | qualified_id ;
+
+unqualified_id = identifier ;
+
+qualified_id = ["::"] [nested_name_specifier] unqualified_id
+             | ["::"] identifier ;
+
+identifier = ? clang::tok::identifier ? ;
+
+numeric_literal = ? clang::tok::numeric_constant ? ;
+
+boolean_literal = "true" | "false" ;
+
+pointer_literal = "nullptr" ;
+
+builtin_func = builtin_func_name "(" [builtin_func_argument_list] ")" ;
+
+builtin_func_name = "__log2" ;
+
+builtin_func_argument_list = builtin_func_argument
+                           | builtin_func_argument_list "," builtin_func_argument
+
+builtin_func_argument = expression ;
diff --git a/lldb/include/lldb/Core/DILAST.h b/lldb/include/lldb/Core/DILAST.h
new file mode 100644
index 0000000000000..acb9da088eda8
--- /dev/null
+++ b/lldb/include/lldb/Core/DILAST.h
@@ -0,0 +1,690 @@
+//===-- DILAST.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_DIL_AST_H_
+#define LLDB_DIL_AST_H_
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "lldb/Core/ValueObject.h"
+#include "lldb/Symbol/Type.h"
+#include "lldb/Symbol/TypeList.h"
+#include "lldb/Target/LanguageRuntime.h"
+#include "lldb/Utility/ConstString.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+
+namespace lldb_private {
+
+/// Struct to hold information about member fields. Used by the parser for the
+/// Data Inspection Language (DIL).
+struct DILMemberInfo {
+  std::optional<std::string> name;
+  CompilerType type;
+  bool is_bitfield;
+  uint32_t bitfield_size_in_bits;
+  bool is_synthetic;
+  bool is_dynamic;
+  lldb::ValueObjectSP val_obj_sp;
+
+  explicit operator bool() const { return type.IsValid(); }
+};
+
+/// This determines if the type is a shared, unique or weak pointer, either
+/// from stdlibc++ or libc+++.
+bool IsSmartPtrType(CompilerType type);
+
+/// Finds the member field with the given name and type, stores the child index
+/// corresponding to the field in the idx vector and returns a DILMemberInfo
+/// struct with appropriate information about the field.
+DILMemberInfo GetFieldWithNameIndexPath(lldb::ValueObjectSP lhs_val_sp,
+                                        CompilerType type,
+                                        const std::string &name,
+                                        std::vector<uint32_t> *idx,
+                                        CompilerType empty_type,
+                                        bool use_synthetic, bool is_dynamic);
+
+std::tuple<DILMemberInfo, std::vector<uint32_t>>
+GetMemberInfo(lldb::ValueObjectSP lhs_val_sp, CompilerType type,
+              const std::string &name, bool use_synthetic);
+
+/// Get the appropriate ValueObjectSP, consulting the use_dynamic and
+/// use_synthetic options passed, acquiring the process & target locks if
+/// appropriate.
+lldb::ValueObjectSP
+DILGetSPWithLock(lldb::ValueObjectSP valobj_sp,
+                 lldb::DynamicValueType use_dynamic = lldb::eNoDynamicValues,
+                 bool use_synthetic = false);
+
+/// The various types DIL AST nodes (used by the DIL parser).
+enum class DILNodeKind {
+  kDILErrorNode,
+  kLiteralNode,
+  kIdentifierNode,
+  kSizeOfNode,
+  kBuiltinFunctionCallNode,
+  kCStyleCastNode,
+  kCxxStaticCastNode,
+  kCxxReinterpretCastNode,
+  kMemberOfNode,
+  kArraySubscriptNode,
+  kBinaryOpNode,
+  kUnaryOpNode,
+  kTernaryOpNode,
+  kSmartPtrToPtrDecay
+};
+
+/// The C-Style casts allowed by DIL.
+enum class CStyleCastKind {
+  kArithmetic,
+  kEnumeration,
+  kPointer,
+  kNullptr,
+  kReference,
+};
+
+/// The Cxx static casts allowed by DIL.
+enum class CxxStaticCastKind {
+  kNoOp,
+  kArithmetic,
+  kEnumeration,
+  kPointer,
+  kNullptr,
+  kBaseToDerived,
+  kDerivedToBase,
+};
+
+/// The binary operators recognized by DIL.
+enum class BinaryOpKind {
+  Mul,       // "*"
+  Div,       // "/"
+  Rem,       // "%"
+  Add,       // "+"
+  Sub,       // "-"
+  Shl,       // "<<"
+  Shr,       // ">>"
+  LT,        // "<"
+  GT,        // ">"
+  LE,        // "<="
+  GE,        // ">="
+  EQ,        // "=="
+  NE,        // "!="
+  And,       // "&"
+  Xor,       // "^"
+  Or,        // "|"
+  LAnd,      // "&&"
+  LOr,       // "||"
+  Assign,    // "="
+  MulAssign, // "*="
+  DivAssign, // "/="
+  RemAssign, // "%="
+  AddAssign, // "+="
+  SubAssign, // "-="
+  ShlAssign, // "<<="
+  ShrAssign, // ">>="
+  AndAssign, // "&="
+  XorAssign, // "^="
+  OrAssign,  // "|="
+};
+
+/// The Unary operators recognized by DIL.
+enum class UnaryOpKind {
+  PostInc, // "++"
+  PostDec, // "--"
+  PreInc,  // "++"
+  PreDec,  // "--"
+  AddrOf,  // "&"
+  Deref,   // "*"
+  Plus,    // "+"
+  Minus,   // "-"
+  Not,     // "~"
+  LNot,    // "!"
+};
+
+/// Helper functions for DIL AST node parsing.
+
+/// Translates clang tokens to BinaryOpKind.
+BinaryOpKind
+clang_token_kind_to_binary_op_kind(clang::tok::TokenKind token_kind);
+
+/// Returns bool indicating whether or not the input kind is an assignment.
+bool binary_op_kind_is_comp_assign(BinaryOpKind kind);
+
+/// Given a string representing a type, returns the CompilerType corresponding
+/// to the named type, if it exists.
+CompilerType
+ResolveTypeByName(const std::string &name,
+                  std::shared_ptr<ExecutionContextScope> ctx_scope);
+
+/// Quick lookup to check if a type name already exists in a
+/// name-to-CompilerType map the DIL parser keeps of previously found
+/// name/type pairs.
+bool IsContextVar(const std::string &name);
+
+/// Checks to see if the CompilerType is a Smart Pointer (shared, unique, weak)
+/// or not. Only applicable for C++, which is why this is here and not part of
+/// the CompilerType class.
+bool IsSmartPtrType(CompilerType type);
+
+/// Class used to store & manipulate information about identifiers.
+class IdentifierInfo {
+private:
+  using MemberPath = std::vector<uint32_t>;
+  using IdentifierInfoPtr = std::unique_ptr<IdentifierInfo>;
+
+public:
+  enum class Kind {
+    kValue,
+    kContextArg,
+    kMemberPath,
+    kThisKeyword,
+  };
+
+  static IdentifierInfoPtr FromValue(lldb::ValueObjectSP value_sp) {
+    CompilerType type;
+    lldb::ValueObjectSP value = DILGetSPWithLock(value_sp);
+    if (value)
+      type = value->GetCompilerType();
+    return IdentifierInfoPtr(new IdentifierInfo(Kind::kValue, type, value, {}));
+  }
+
+  static IdentifierInfoPtr FromContextArg(CompilerType type) {
+    lldb::ValueObjectSP empty_value;
+    return IdentifierInfoPtr(
+        new IdentifierInfo(Kind::kContextArg, type, empty_value, {}));
+  }
+
+  static IdentifierInfoPtr FromMemberPath(CompilerType type, MemberPath path) {
+    lldb::ValueObjectSP empty_value;
+    return IdentifierInfoPtr(new IdentifierInfo(Kind::kMemberPath, type,
+                                                empty_value, std::move(path)));
+  }
+
+  static IdentifierInfoPtr FromThisKeyword(CompilerType type) {
+    lldb::ValueObjectSP empty_value;
+    return IdentifierInfoPtr(
+        new IdentifierInfo(Kind::kThisKeyword, type, empty_value, {}));
+  }
+
+  Kind kind() const { return m_kind; }
+  lldb::ValueObjectSP value() const { return m_value; }
+  const MemberPath &path() const { return m_path; }
+
+  CompilerType GetType() { return m_type; }
+  bool IsValid() const { return m_type.IsValid(); }
+
+  IdentifierInfo(Kind kind, CompilerType type, lldb::ValueObjectSP value,
+                 MemberPath path)
+      : m_kind(kind), m_type(type), m_value(std::move(value)),
+        m_path(std::move(path)) {}
+
+private:
+  Kind m_kind;
+  CompilerType m_type;
+  lldb::ValueObjectSP m_value;
+  MemberPath m_path;
+};
+
+/// Given the name of an identifier (variable name, member name, type name,
+/// etc.), find the ValueObject for that name (if it exists) and create and
+/// return an IdentifierInfo object containing all the relevant information
+/// about that object (for DIL parsing and evaluating).
+std::unique_ptr<IdentifierInfo> LookupIdentifier(
+    const std::string &name, std::shared_ptr<ExecutionContextScope> ctx_scope,
+    lldb::DynamicValueType use_dynamic, CompilerType *scope_ptr = nullptr);
+
+/// Forward declaration, for use in DIL AST nodes. Definition is at the very
+/// end of this file.
+class DILVisitor;
+
+/// The rest of the classes in this file, except for the DILVisitor class at the
+/// very end, define all the types of AST nodes used by the DIL parser and
+/// expression evaluator. The DIL parser parses the input string and creates the
+/// AST parse tree from the AST nodes. The resulting AST node tree gets passed
+/// to the DIL expression evaluator, which evaluates the DIL AST nodes and
+/// creates/returns a ValueObjectSP containing the result.
+
+/// Base class for AST nodes used by the Data Inspection Language (DIL) parser.
+/// All of the specialized types of AST nodes inherit from this (virtual) base
+/// class.
+class DILASTNode {
+public:
+  DILASTNode(clang::SourceLocation location) : location_(location) {}
+  virtual ~DILASTNode() {}
+
+  virtual void Accept(DILVisitor *v) const = 0;
+
+  virtual bool is_error() const { return false; };
+  virtual bool is_rvalue() const = 0;
+  virtual bool is_bitfield() const { return false; };
+  virtual bool is_context_var() const { return false; };
+  virtual bool is_literal_zero() const { return false; }
+  virtual uint32_t bitfield_size() const { return 0; }
+  virtual CompilerType result_type() const = 0;
+
+  virtual DILNodeKind what_am_i() const = 0;
+
+  clang::SourceLocation location() const { return location_; }
+
+  // The expression result type, but dereferenced in case it's a reference. This
+  // is for convenience, since for the purposes of the semantic analysis only
+  // the dereferenced type matters.
+  CompilerType result_type_deref() const;
+
+private:
+  clang::SourceLocation location_;
+};
+
+using ParseResult = std::unique_ptr<DILASTNode>;
+
+class DILErrorNode : public DILASTNode {
+public:
+  DILErrorNode(CompilerType empty_type)
+      : DILASTNode(clang::SourceLocation()), m_empty_type(empty_type) {}
+  void Accept(DILVisitor *v) const override;
+  bool is_error() const override { return true; }
+  bool is_rvalue() const override { return false; }
+  CompilerType result_type() const override { return m_empty_type; }
+  CompilerType result_type_real() const { return m_empty_type; }
+  DILNodeKind what_am_i() const override { return DILNodeKind::kDILErrorNode; }
+
+private:
+  CompilerType m_empty_type;
+};
+
+class LiteralNode : public DILASTNode {
+public:
+  template <typename ValueT>
+  LiteralNode(clang::SourceLocation location, CompilerType type, ValueT &&value,
+              bool is_literal_zero)
+      : DILASTNode(location), m_type(type),
+        m_value(std::forward<ValueT>(value)),
+        m_is_literal_zero(is_literal_zero) {}
+
+  void Accept(DILVisitor *v) const override;
+  bool is_rvalue() const override { return true; }
+  bool is_literal_zero() const override { return m_is_literal_zero; }
+  CompilerType result_type() const override { return m_type; }
+  DILNodeKind what_am_i() const override { return DILNodeKind::kLiteralNode; }
+
+  template <typename ValueT> ValueT value() const {
+    return std::get<ValueT>(m_value);
+  }
+
+  auto value() const { return m_value; }
+
+private:
+  CompilerType m_type;
+  std::variant<llvm::APInt, llvm::APFloat, bool, std::vector<char>> m_value;
+  bool m_is_literal_zero;
+};
+
+class IdentifierNode : public DILASTNode {
+public:
+  IdentifierNode(clang::SourceLocation location, std::string name,
+                 std::unique_ptr<IdentifierInfo> identifier, bool is_rvalue,
+                 bool is_context_var)
+      : DILASTNode(location), m_is_rvalue(is_rvalue),
+        m_is_context_var(is_context_var), m_name(std::move(name)),
+        m_identifier(std::move(identifier)) {}
+
+  void Accept(DILVisitor *v) const override;
+  bool is_rvalue() const override { return m_is_rvalue; }
+  bool is_context_var() const override { return m_is_context_var; };
+  CompilerType result_type() const override { return m_identifier->GetType(); }
+  DILNodeKind what_am_i() const override {
+    return DILNodeKind::kIdentifierNode;
+  }
+
+  std::string name() const { return m_name; }
+  const IdentifierInfo &info() const { return *m_identifier; }
+
+private:
+  bool m_is_rvalue;
+  bool m_is_context_var;
+  std::string m_name;
+  std::unique_ptr<IdentifierInfo> m_identifier;
+};
+
+class SizeOfNode : public DILASTNode {
+public:
+  SizeOfNode(clang::SourceLocation location, CompilerType type,
+             CompilerType operand)
+      : DILASTNode(location), m_type(type), m_operand(operand) {}
+
+  void Accept(DILVisitor *v) const override;
+  bool is_rvalue() const override { return true; }
+  CompilerType result_type() const override { return m_type; }
+  DILNodeKind what_am_i() const override { return DILNodeKind::kSizeOfNode; }
+
+  CompilerType operand() const { return m_operand; }
+
+private:
+  CompilerType m_type;
+  CompilerType m_operand;
+};
+
+class BuiltinFunctionCallNode : public DILASTNode {
+public:
+  BuiltinFunctionCallNode(clang::SourceLocation location,
+                          CompilerType result_type, std::string name,
+                          std::vector<ParseResult> arguments)
+      : DILASTNode(location), m_result_type(result_type),
+        m_name(std::move(name)), m_arguments(std::move(arguments)) {}
+
+  void Accept(DILVisitor *v) const override;
+  bool is_rvalue() const override { return true; }
+  CompilerType result_type() const override { return m_result_type; }
+  DILNodeKind what_am_i() const override {
+    return DILNodeKind::kBuiltinFunctionCallNode;
+  }
+
+  std::string name() const { return m_name; }
+  const std::vector<ParseResult> &arguments() const { return m_arguments; };
+
+private:
+  CompilerType m_result_type;
+  std::string m_name;
+  std::vector<ParseResult> m_arguments;
+};
+
+class CStyleCastNode : public DILASTNode {
+public:
+  CStyleCastNode(clang::SourceLocation location, CompilerType type,
+                 ParseResult rhs, CStyleCastKind kind)
+      : DILASTNode(location), m_type(type), m_rhs(std::move(rhs)),
+        m_kind(kind) {}
+
+  void Accept(DILVisitor *v) const override;
+  bool is_rvalue() const override {
+    return m_kind != CStyleCastKind::kReference;
+  }
+  CompilerType result_type() const override { return m_type; }
+  DILNodeKind what_am_i() const override {
+    return DILNodeKind::kCStyleCastNode;
+  }
+
+  CompilerType type() const { return m_type; }
+  DILASTNode *rhs() const { return m_rhs.get(); }
+  CStyleCastKind kind() const { return m_kind; }
+
+private:
+  CompilerType m_type;
+  ParseResult m_rhs;
+  CStyleCastKind m_kind;
+};
+
+class CxxStaticCastNode : public DILASTNode {
+public:
+  CxxStaticCastNode(clang::SourceLocation location, CompilerType type,
+                    ParseResult rhs, CxxStaticCastKind ki...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/95738


More information about the lldb-commits mailing list