[Lldb-commits] [lldb] [LLDB] Add DIL code for handling plain variable names. (PR #120971)

Wed Apr 2 09:20:00 PDT 2025

================
@@ -0,0 +1,283 @@
+//===-- DILParser.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// This implements the recursive descent parser for the Data Inspection
+// Language (DIL), and its helper functions, which will eventually underlie the
+// 'frame variable' command. The language that this parser recognizes is
+// described in lldb/docs/dil-expr-lang.ebnf
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/ValueObject/DILParser.h"
+#include "lldb/Target/ExecutionContextScope.h"
+#include "lldb/Utility/DiagnosticsRendering.h"
+#include "lldb/ValueObject/DILAST.h"
+#include "lldb/ValueObject/DILEval.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormatAdapters.h"
+#include <cstdlib>
+#include <limits.h>
+#include <memory>
+#include <sstream>
+#include <string>
+
+namespace lldb_private::dil {
+
+DILDiagnosticError::DILDiagnosticError(llvm::StringRef expr,
+                                       const std::string &message, uint32_t loc,
+                                       uint16_t err_len)
+    : ErrorInfo(make_error_code(std::errc::invalid_argument)) {
+  DiagnosticDetail::SourceLocation sloc = {
+      FileSpec{}, /*line=*/1, static_cast<uint16_t>(loc + 1),
+      err_len,    false,      /*in_user_input=*/true};
+  std::string rendered_msg =
+      llvm::formatv("<user expression 0>:1:{0}: {1}\n   1 | {2}\n     | ^",
+                    loc + 1, message, expr);
+  DiagnosticDetail detail;
+  detail.source_location = sloc;
+  detail.severity = lldb::eSeverityError;
+  detail.message = message;
+  detail.rendered = rendered_msg;
+  m_detail = std::move(detail);
+}
+
+llvm::Expected<ASTNodeUP>
+DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer,
+                 std::shared_ptr<StackFrame> frame_sp,
+                 lldb::DynamicValueType use_dynamic, bool use_synthetic,
+                 bool fragile_ivar, bool check_ptr_vs_member) {
+  Status lldb_error;
+  // Cannot declare an llvm::Error without initializing it to something, because
+  // llvm::Error::Error() constructor is protected. If there's a better way to
+  // handle this, please let me know.
+  llvm::Error error(lldb_error.takeError());
+  DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic,
+                   fragile_ivar, check_ptr_vs_member, error);
+
+  ASTNodeUP node_up = parser.Run();
+
+  if (error)
+    return error;
+
+  return node_up;
+}
+
+DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer,
+                     std::shared_ptr<StackFrame> frame_sp,
+                     lldb::DynamicValueType use_dynamic, bool use_synthetic,
+                     bool fragile_ivar, bool check_ptr_vs_member,
+                     llvm::Error &error)
+    : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr),
+      m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic),
+      m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar),
+      m_check_ptr_vs_member(check_ptr_vs_member) {}
+
+ASTNodeUP DILParser::Run() {
+  ASTNodeUP expr = ParseExpression();
+
+  Expect(Token::Kind::eof);
+
+  return expr;
+}
+
+// Parse an expression.
+//
+//  expression:
+//    primary_expression
+//
+ASTNodeUP DILParser::ParseExpression() { return ParsePrimaryExpression(); }
+
+// Parse a primary_expression.
+//
+//  primary_expression:
+//    id_expression
+//    "(" expression ")"
+//
+ASTNodeUP DILParser::ParsePrimaryExpression() {
+  if (CurToken().IsOneOf({Token::coloncolon, Token::identifier})) {
+    // Save the source location for the diagnostics message.
+    uint32_t loc = CurToken().GetLocation();
+    auto identifier = ParseIdExpression();
+
+    return std::make_unique<IdentifierNode>(loc, identifier, m_use_dynamic,
+                                            m_ctx_scope);
+  }
+
+  if (CurToken().Is(Token::l_paren)) {
+    m_dil_lexer.Advance();
+    auto expr = ParseExpression();
+    Expect(Token::r_paren);
+    m_dil_lexer.Advance();
+    return expr;
+  }
+
+  BailOut(ErrorCode::kInvalidExpressionSyntax,
+          llvm::formatv("Unexpected token: {0}", CurToken()),
+          CurToken().GetLocation());
+  return std::make_unique<ErrorNode>();
+}
+
+// Parse nested_name_specifier.
+//
+//  nested_name_specifier:
+//    type_name "::"
+//    namespace_name "::"
+//    nested_name_specifier identifier "::"
+//
+std::string DILParser::ParseNestedNameSpecifier() {
+  // The first token in nested_name_specifier is always an identifier, or
+  // '(anonymous namespace)'.
+  if (CurToken().IsNot(Token::identifier) && CurToken().IsNot(Token::l_paren))
+    return "";
+
+  // Anonymous namespaces need to be treated specially: They are represented
+  // the the string '(anonymous namespace)', which has a space in it (throwing
+  // off normal parsing) and is not actually proper C++> Check to see if we're
+  // looking at '(anonymous namespace)::...'
+  if (CurToken().Is(Token::l_paren)) {
+    // Look for all the pieces, in order:
+    // l_paren 'anonymous' 'namespace' r_paren coloncolon
+    if (m_dil_lexer.LookAhead(1).Is(Token::identifier) &&
+        (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") &&
+        m_dil_lexer.LookAhead(2).Is(Token::identifier) &&
+        (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") &&
+        m_dil_lexer.LookAhead(3).Is(Token::r_paren) &&
+        m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) {
+      m_dil_lexer.Advance(4);
+
+      assert(
+          (CurToken().Is(Token::identifier) || CurToken().Is(Token::l_paren)) &&
+          "Expected an identifier or anonymous namespace, but not found.");
+      // Continue parsing the nested_namespace_specifier.
+      std::string identifier2 = ParseNestedNameSpecifier();
+      if (identifier2.empty()) {
+        Expect(Token::identifier);
+        identifier2 = CurToken().GetSpelling();
+        m_dil_lexer.Advance();
+      }
+      return "(anonymous namespace)::" + identifier2;
+    }
+
+    return "";
+  } // end of special handling for '(anonymous namespace)'
+
+  // If the next token is scope ("::"), then this is indeed a
+  // nested_name_specifier
+  if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) {
+    // This nested_name_specifier is a single identifier.
+    std::string identifier = CurToken().GetSpelling();
+    m_dil_lexer.Advance(1);
+    Expect(Token::coloncolon);
+    m_dil_lexer.Advance();
+    // Continue parsing the nested_name_specifier.
+    return identifier + "::" + ParseNestedNameSpecifier();
+  }
+
+  return "";
+}
+
+// Parse an id_expression.
+//
+//  id_expression:
+//    unqualified_id
+//    qualified_id
+//
+//  qualified_id:
+//    ["::"] [nested_name_specifier] unqualified_id
+//    ["::"] identifier
+//
+//  identifier:
+//    ? Token::identifier ?
+//
+std::string DILParser::ParseIdExpression() {
+  // Try parsing optional global scope operator.
+  bool global_scope = false;
+  if (CurToken().Is(Token::coloncolon)) {
+    global_scope = true;
+    m_dil_lexer.Advance();
+  }
+
+  // Try parsing optional nested_name_specifier.
+  auto nested_name_specifier = ParseNestedNameSpecifier();
+
+  // If nested_name_specifier is present, then it's qualified_id production.
+  // Follow the first production rule.
+  if (!nested_name_specifier.empty()) {
+    // Parse unqualified_id and construct a fully qualified id expression.
+    auto unqualified_id = ParseUnqualifiedId();
+
+    return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "",
+                         nested_name_specifier, unqualified_id);
+  }
+
+  // No nested_name_specifier, but with global scope -- this is also a
+  // qualified_id production. Follow the second production rule.
+  if (global_scope) {
+    Expect(Token::identifier);
+    std::string identifier = CurToken().GetSpelling();
+    m_dil_lexer.Advance();
+    return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier);
+  }
+
+  // This is unqualified_id production.
+  return ParseUnqualifiedId();
+}
+
+// Parse an unqualified_id.
+//
+//  unqualified_id:
+//    identifier
+//
+//  identifier:
+//    ? Token::identifier ?
+//
+std::string DILParser::ParseUnqualifiedId() {
+  Expect(Token::identifier);
+  std::string identifier = CurToken().GetSpelling();
+  m_dil_lexer.Advance();
+  return identifier;
+}
+
+// Must somehow convert the DILDiagnosticError to a lldb::Status, since *that*
+// is was must be returned to the  place where CommandObjectFrame::DoExecute
+// calls GetVariableValueForExpressionPath (at the moment).
+Status GetStatusError(DILDiagnosticError dil_error) {
+  return Status(dil_error.GetDetails()[0].message);
+}
+
+void DILParser::BailOut(ErrorCode code, const std::string &error,
+                        uint32_t loc) {
+  if (m_error)
+    // If error is already set, then the parser is in the "bail-out" mode. Don't
+    // do anything and keep the original error.
+    return;
+
+  m_error = llvm::make_error<DILDiagnosticError>(
+      m_input_expr, error, loc, CurToken().GetSpelling().length());
----------------
cmtice wrote:

I do expect the error to always be on the current token, but we do need the `loc` parameter: That is supposed to indicate the starting position of the error (the 'bad' token) within the overall input expression.  The current token does not have this information. It might be able to tell which token it is (first, second, etc.), but it won't know the lengths of each of the preceding tokens (plus any spaces between them), so it won't know where it's starting position is in the overall input string.

If  you would prefer, I *can* update this BailOut function to take the length-to-underline (currently `CurToken().GetSpelling().Length()`) as an input parameter that it then passes to DILDiagnosticError. I *think* you indicated that you would find that acceptable?

https://github.com/llvm/llvm-project/pull/120971