[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)
Pavel Labath via lldb-commits
lldb-commits at lists.llvm.org
Mon Jan 20 02:14:38 PST 2025
================
@@ -0,0 +1,156 @@
+//===-- DILLexer.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+#include <limits.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+ coloncolon,
+ eof,
+ identifier,
+ invalid,
+ kw_namespace,
+ l_paren,
+ none,
+ r_paren,
+ unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+ DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+ : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+ DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+ void setKind(dil::TokenKind kind) { m_kind = kind; }
+ dil::TokenKind getKind() const { return m_kind; }
+
+ std::string getSpelling() const { return m_spelling; }
+
+ uint32_t getLength() const { return m_spelling.size(); }
+
+ bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+ bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+ bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+ return is(kind1) || is(kind2);
+ }
+
+ template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+ return is(kind) || isOneOf(Ks...);
+ }
+
+ uint32_t getLocation() const { return m_start_pos; }
+
+ void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+ m_kind = kind;
+ m_spelling = spelling;
+ m_start_pos = start;
+ }
+
+ static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+ dil::TokenKind m_kind;
+ std::string m_spelling;
+ uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+ DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+ m_cur_pos = m_expr.begin();
+ // Use UINT_MAX to indicate invalid/uninitialized value.
+ m_tokens_idx = UINT_MAX;
+ }
+
+ bool Lex(DILToken &result, bool look_ahead = false);
+
+ bool Is_Word(std::string::iterator start, uint32_t &length);
+
+ uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+ /// Update 'result' with the other paremeter values, create a
+ /// duplicate token, and push the duplicate token onto the vector of
+ /// lexed tokens.
+ void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+ /// Return the lexed token N+1 positions ahead of the 'current' token
+ /// being handled by the DIL parser.
+ const DILToken &LookAhead(uint32_t N);
+
+ const DILToken &AcceptLookAhead(uint32_t N);
----------------
labath wrote:
That would definitely be nice, and if its true, I'd consider taking this even further: given that we're going to be processing about a line of text at most, we might be able to just eagerly lex the whole string and avoid the whole on-demand parsing business. If so, maybe we don't even need the lexer *class* and the lexing could consist of a single function like `std::vector<Token> Lex(string)` ?
https://github.com/llvm/llvm-project/pull/123521
More information about the lldb-commits
mailing list