[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)
via lldb-commits
lldb-commits at lists.llvm.org
Sun Feb 2 22:07:08 PST 2025
https://github.com/cmtice updated https://github.com/llvm/llvm-project/pull/123521
>From 468f73f8539dcb8addf8ed9618d9eb797dabbb01 Mon Sep 17 00:00:00 2001
From: Caroline Tice <cmtice at google.com>
Date: Sun, 19 Jan 2025 09:15:34 -0800
Subject: [PATCH 1/5] [LLDB] Add Lexer (with tests) for DIL (Data Inspection
Language).
This adds the basic lexer, with unittests, for the Data Inspection
Language (DIL) -- see
https://discourse.llvm.org/t/rfc-data-inspection-language/69893
This version of the lexer only handles local variables and namespaces,
and is designed to work with
https://github.com/llvm/llvm-project/pull/120971.
---
lldb/include/lldb/ValueObject/DILLexer.h | 156 ++++++++++++++
lldb/source/ValueObject/DILLexer.cpp | 205 +++++++++++++++++++
lldb/unittests/ValueObject/CMakeLists.txt | 1 +
lldb/unittests/ValueObject/DILLexerTests.cpp | 193 +++++++++++++++++
4 files changed, 555 insertions(+)
create mode 100644 lldb/include/lldb/ValueObject/DILLexer.h
create mode 100644 lldb/source/ValueObject/DILLexer.cpp
create mode 100644 lldb/unittests/ValueObject/DILLexerTests.cpp
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
new file mode 100644
index 00000000000000..45c506b2f4106d
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -0,0 +1,156 @@
+//===-- DILLexer.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+#include <limits.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+ coloncolon,
+ eof,
+ identifier,
+ invalid,
+ kw_namespace,
+ l_paren,
+ none,
+ r_paren,
+ unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+ DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+ : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+ DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+ void setKind(dil::TokenKind kind) { m_kind = kind; }
+ dil::TokenKind getKind() const { return m_kind; }
+
+ std::string getSpelling() const { return m_spelling; }
+
+ uint32_t getLength() const { return m_spelling.size(); }
+
+ bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+ bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+ bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+ return is(kind1) || is(kind2);
+ }
+
+ template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+ return is(kind) || isOneOf(Ks...);
+ }
+
+ uint32_t getLocation() const { return m_start_pos; }
+
+ void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+ m_kind = kind;
+ m_spelling = spelling;
+ m_start_pos = start;
+ }
+
+ static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+ dil::TokenKind m_kind;
+ std::string m_spelling;
+ uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+ DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+ m_cur_pos = m_expr.begin();
+ // Use UINT_MAX to indicate invalid/uninitialized value.
+ m_tokens_idx = UINT_MAX;
+ }
+
+ bool Lex(DILToken &result, bool look_ahead = false);
+
+ bool Is_Word(std::string::iterator start, uint32_t &length);
+
+ uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+ /// Update 'result' with the other paremeter values, create a
+ /// duplicate token, and push the duplicate token onto the vector of
+ /// lexed tokens.
+ void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+ /// Return the lexed token N+1 positions ahead of the 'current' token
+ /// being handled by the DIL parser.
+ const DILToken &LookAhead(uint32_t N);
+
+ const DILToken &AcceptLookAhead(uint32_t N);
+
+ /// Return the index for the 'current' token being handled by the DIL parser.
+ uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
+ /// Return the current token to be handled by the DIL parser.
+ DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+ /// Update the index for the 'current' token, to point to the next lexed
+ /// token.
+ bool IncrementTokenIdx() {
+ if (m_tokens_idx >= m_lexed_tokens.size() - 1)
+ return false;
+
+ m_tokens_idx++;
+ return true;
+ }
+
+ /// Set the index for the 'current' token (to be handled by the parser)
+ /// to a particular position. Used for either committing 'look ahead' parsing
+ /// or rolling back tentative parsing.
+ bool ResetTokenIdx(uint32_t new_value) {
+ if (new_value > m_lexed_tokens.size() - 1)
+ return false;
+
+ m_tokens_idx = new_value;
+ return true;
+ }
+
+private:
+ // The input string we are lexing & parsing.
+ std::string m_expr;
+
+ // The current position of the lexer within m_expr (the character position,
+ // within the string, of the next item to be lexed).
+ std::string::iterator m_cur_pos;
+
+ // Holds all of the tokens lexed so far.
+ std::vector<DILToken> m_lexed_tokens;
+
+ // Index into m_lexed_tokens; indicates which token the DIL parser is
+ // currently trying to parse/handle.
+ uint32_t m_tokens_idx;
+
+ // "invalid" token; to be returned by lexer when 'look ahead' fails.
+ DILToken m_invalid_token;
+};
+
+} // namespace dil
+
+} // namespace lldb_private
+
+#endif // LLDB_VALUEOBJECT_DILLEXER_H_
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
new file mode 100644
index 00000000000000..4c2b0b1813bb96
--- /dev/null
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -0,0 +1,205 @@
+//===-- DILLexer.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// This implements the recursive descent parser for the Data Inspection
+// Language (DIL), and its helper functions, which will eventually underlie the
+// 'frame variable' command. The language that this parser recognizes is
+// described in lldb/docs/dil-expr-lang.ebnf
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+// For fast keyword lookup. More keywords will be added later.
+const llvm::StringMap<dil::TokenKind> Keywords = {
+ {"namespace", dil::TokenKind::kw_namespace},
+};
+
+const std::string DILToken::getTokenName(dil::TokenKind kind) {
+ switch (kind) {
+ case dil::TokenKind::coloncolon:
+ return "coloncolon";
+ case dil::TokenKind::eof:
+ return "eof";
+ case dil::TokenKind::identifier:
+ return "identifier";
+ case dil::TokenKind::kw_namespace:
+ return "namespace";
+ case dil::TokenKind::l_paren:
+ return "l_paren";
+ case dil::TokenKind::r_paren:
+ return "r_paren";
+ case dil::TokenKind::unknown:
+ return "unknown";
+ default:
+ return "token_name";
+ }
+}
+
+static bool Is_Letter(char c) {
+ if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+ return true;
+ return false;
+}
+
+static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); }
+
+// A word starts with a letter, underscore, or dollar sign, followed by
+// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
+bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) {
+ bool done = false;
+ bool dollar_start = false;
+
+ // Must not start with a digit.
+ if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos))
+ return false;
+
+ // First character *may* be a '$', for a register name or convenience
+ // variable.
+ if (*m_cur_pos == '$') {
+ dollar_start = true;
+ ++m_cur_pos;
+ length++;
+ }
+
+ // Contains only letters, digits or underscores
+ for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) {
+ char c = *m_cur_pos;
+ if (!Is_Letter(c) && !Is_Digit(c) && c != '_') {
+ done = true;
+ break;
+ } else
+ length++;
+ }
+
+ if (dollar_start && length > 1) // Must have something besides just '$'
+ return true;
+
+ if (!dollar_start && length > 0)
+ return true;
+
+ // Not a valid word, so re-set the lexing position.
+ m_cur_pos = start;
+ return false;
+}
+
+void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos) {
+ DILToken new_token;
+ result.setValues(tok_kind, tok_str, tok_pos);
+ new_token = result;
+ m_lexed_tokens.push_back(std::move(new_token));
+}
+
+bool DILLexer::Lex(DILToken &result, bool look_ahead) {
+ bool retval = true;
+
+ if (!look_ahead) {
+ // We're being asked for the 'next' token, and not a part of a LookAhead.
+ // Check to see if we've already lexed it and pushed it onto our tokens
+ // vector; if so, return the next token from the vector, rather than doing
+ // more lexing.
+ if ((m_tokens_idx != UINT_MAX) &&
+ (m_tokens_idx < m_lexed_tokens.size() - 1)) {
+ result = m_lexed_tokens[m_tokens_idx + 1];
+ return retval;
+ }
+ }
+
+ // Skip over whitespace (spaces).
+ while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ')
+ m_cur_pos++;
+
+ // Check to see if we've reached the end of our input string.
+ if (m_cur_pos == m_expr.end()) {
+ UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length());
+ return retval;
+ }
+
+ uint32_t position = m_cur_pos - m_expr.begin();
+ ;
+ std::string::iterator start = m_cur_pos;
+ uint32_t length = 0;
+ if (Is_Word(start, length)) {
+ dil::TokenKind kind;
+ std::string word = m_expr.substr(position, length);
+ auto iter = Keywords.find(word);
+ if (iter != Keywords.end())
+ kind = iter->second;
+ else
+ kind = dil::TokenKind::identifier;
+
+ UpdateLexedTokens(result, kind, word, position);
+ return true;
+ }
+
+ switch (*m_cur_pos) {
+ case '(':
+ m_cur_pos++;
+ UpdateLexedTokens(result, dil::TokenKind::l_paren, "(", position);
+ return true;
+ case ')':
+ m_cur_pos++;
+ UpdateLexedTokens(result, dil::TokenKind::r_paren, ")", position);
+ return true;
+ case ':':
+ if (position + 1 < m_expr.size() && m_expr[position + 1] == ':') {
+ m_cur_pos += 2;
+ UpdateLexedTokens(result, dil::TokenKind::coloncolon, "::", position);
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ // Empty Token
+ result.setValues(dil::TokenKind::none, "", m_expr.length());
+ return false;
+}
+
+const DILToken &DILLexer::LookAhead(uint32_t N) {
+ uint32_t extra_lexed_tokens = m_lexed_tokens.size() - m_tokens_idx - 1;
+
+ if (N + 1 < extra_lexed_tokens)
+ return m_lexed_tokens[m_tokens_idx + N + 1];
+
+ uint32_t remaining_tokens =
+ (m_tokens_idx + N + 1) - m_lexed_tokens.size() + 1;
+
+ bool done = false;
+ bool look_ahead = true;
+ while (!done && remaining_tokens > 0) {
+ DILToken tok;
+ Lex(tok, look_ahead);
+ if (tok.getKind() == dil::TokenKind::eof)
+ done = true;
+ remaining_tokens--;
+ };
+
+ if (remaining_tokens > 0) {
+ m_invalid_token.setValues(dil::TokenKind::invalid, "", 0);
+ return m_invalid_token;
+ }
+
+ return m_lexed_tokens[m_tokens_idx + N + 1];
+}
+
+const DILToken &DILLexer::AcceptLookAhead(uint32_t N) {
+ if (m_tokens_idx + N + 1 > m_lexed_tokens.size())
+ return m_invalid_token;
+
+ m_tokens_idx += N + 1;
+ return m_lexed_tokens[m_tokens_idx];
+}
+
+} // namespace dil
+
+} // namespace lldb_private
diff --git a/lldb/unittests/ValueObject/CMakeLists.txt b/lldb/unittests/ValueObject/CMakeLists.txt
index 8fcc8d62a79979..952f5411a98057 100644
--- a/lldb/unittests/ValueObject/CMakeLists.txt
+++ b/lldb/unittests/ValueObject/CMakeLists.txt
@@ -1,5 +1,6 @@
add_lldb_unittest(LLDBValueObjectTests
DumpValueObjectOptionsTests.cpp
+ DILLexerTests.cpp
LINK_LIBS
lldbValueObject
diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp
new file mode 100644
index 00000000000000..ec6ff86b64d36b
--- /dev/null
+++ b/lldb/unittests/ValueObject/DILLexerTests.cpp
@@ -0,0 +1,193 @@
+//===-- DILLexerTests.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "llvm/ADT/StringRef.h"
+#include "gtest/gtest.h"
+#include <string>
+
+using llvm::StringRef;
+
+TEST(DILLexerTests, SimpleTest) {
+ StringRef dil_input_expr("simple_var");
+ uint32_t tok_len = 10;
+ lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
+ lldb_private::dil::DILToken dil_token;
+ dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::unknown);
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ EXPECT_EQ(dil_token.getSpelling(), "simple_var");
+ EXPECT_EQ(dil_token.getLength(), tok_len);
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof);
+}
+
+TEST(DILLexerTests, TokenKindTest) {
+ StringRef dil_input_expr("namespace");
+ lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
+ lldb_private::dil::DILToken dil_token;
+ dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX);
+ dil_lexer.ResetTokenIdx(0);
+
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::kw_namespace);
+ EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier));
+ EXPECT_FALSE(dil_token.is(lldb_private::dil::TokenKind::l_paren));
+ EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::eof,
+ lldb_private::dil::TokenKind::kw_namespace));
+ EXPECT_FALSE(dil_token.isOneOf(lldb_private::dil::TokenKind::l_paren,
+ lldb_private::dil::TokenKind::r_paren,
+ lldb_private::dil::TokenKind::coloncolon,
+ lldb_private::dil::TokenKind::eof));
+
+ dil_token.setKind(lldb_private::dil::TokenKind::identifier);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+}
+
+TEST(DILLexerTests, LookAheadTest) {
+ StringRef dil_input_expr("(anonymous namespace)::some_var");
+ lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
+ lldb_private::dil::DILToken dil_token;
+ dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+ uint32_t expect_loc = 23;
+
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX);
+ dil_lexer.ResetTokenIdx(0);
+
+ // Current token is '('; check the next 4 tokens, to make
+ // sure they are the identifier 'anonymous', the namespace keyword,
+ // ')' and '::', in that order.
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::l_paren);
+ EXPECT_EQ(dil_lexer.LookAhead(0).getKind(),
+ lldb_private::dil::TokenKind::identifier);
+ EXPECT_EQ(dil_lexer.LookAhead(0).getSpelling(), "anonymous");
+ EXPECT_EQ(dil_lexer.LookAhead(1).getKind(),
+ lldb_private::dil::TokenKind::kw_namespace);
+ EXPECT_EQ(dil_lexer.LookAhead(2).getKind(),
+ lldb_private::dil::TokenKind::r_paren);
+ EXPECT_EQ(dil_lexer.LookAhead(3).getKind(),
+ lldb_private::dil::TokenKind::coloncolon);
+ // Verify we've advanced our position counter (lexing location) in the
+ // input 23 characters (the length of '(anonymous namespace)::'.
+ EXPECT_EQ(dil_lexer.GetLocation(), expect_loc);
+
+ // Our current index should still be 0, as we only looked ahead; we are still
+ // officially on the '('.
+ EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 0);
+
+ // Accept the 'lookahead', so our current token is '::', which has the index
+ // 4 in our vector of tokens (which starts at zero).
+ dil_token = dil_lexer.AcceptLookAhead(3);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::coloncolon);
+ EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 4);
+
+ // Lex the final variable name in the input string
+ dil_lexer.Lex(dil_token);
+ dil_lexer.IncrementTokenIdx();
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ EXPECT_EQ(dil_token.getSpelling(), "some_var");
+ EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 5);
+
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof);
+}
+
+TEST(DILLexerTests, MultiTokenLexTest) {
+ StringRef dil_input_expr("This string has several identifiers");
+ lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
+ lldb_private::dil::DILToken dil_token;
+ dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX);
+ dil_lexer.ResetTokenIdx(0);
+
+ EXPECT_EQ(dil_token.getSpelling(), "This");
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ dil_lexer.Lex(dil_token);
+ dil_lexer.IncrementTokenIdx();
+
+ EXPECT_EQ(dil_token.getSpelling(), "string");
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ dil_lexer.Lex(dil_token);
+ dil_lexer.IncrementTokenIdx();
+
+ EXPECT_EQ(dil_token.getSpelling(), "has");
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ dil_lexer.Lex(dil_token);
+ dil_lexer.IncrementTokenIdx();
+
+ EXPECT_EQ(dil_token.getSpelling(), "several");
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ dil_lexer.Lex(dil_token);
+ dil_lexer.IncrementTokenIdx();
+
+ EXPECT_EQ(dil_token.getSpelling(), "identifiers");
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ dil_lexer.Lex(dil_token);
+ dil_lexer.IncrementTokenIdx();
+
+ EXPECT_EQ(dil_token.getSpelling(), "");
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof);
+}
+
+TEST(DILLexerTests, IdentifiersTest) {
+ std::vector<std::string> valid_identifiers = {
+ "$My_name1",
+ "$pc",
+ "abcd",
+ "ab cd",
+ "_",
+ "_a",
+ "_a_",
+ "a_b",
+ "this",
+ "self",
+ "a",
+ "MyName"
+ };
+ std::vector<std::string> invalid_identifiers = {
+ "234",
+ "2a",
+ "2",
+ "$",
+ "1MyName",
+ "",
+ "namespace"
+ };
+
+ // Verify that all of the valid identifiers come out as identifier tokens.
+ for (auto str : valid_identifiers) {
+ StringRef dil_input_expr(str);
+ lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
+ lldb_private::dil::DILToken dil_token;
+ dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+
+ dil_lexer.Lex(dil_token);
+ EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ }
+
+ // Verify that none of the invalid identifiers come out as identifier tokens.
+ for (auto str : invalid_identifiers) {
+ StringRef dil_input_expr(str);
+ lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
+ lldb_private::dil::DILToken dil_token;
+ dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+
+ dil_lexer.Lex(dil_token);
+ EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier));
+ EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::unknown,
+ lldb_private::dil::TokenKind::none,
+ lldb_private::dil::TokenKind::eof,
+ lldb_private::dil::TokenKind::kw_namespace));
+ }
+}
>From 61a2607a70d90688d395321e846a3be58ccbebcb Mon Sep 17 00:00:00 2001
From: Caroline Tice <cmtice at google.com>
Date: Sun, 19 Jan 2025 09:22:51 -0800
Subject: [PATCH 2/5] [LLDB] Add Lexer (with tests) for DIL (Data Inspection
Language)
Update CMakeLists.txt to build DILLexer.cpp.
---
lldb/source/ValueObject/CMakeLists.txt | 1 +
1 file changed, 1 insertion(+)
diff --git a/lldb/source/ValueObject/CMakeLists.txt b/lldb/source/ValueObject/CMakeLists.txt
index 70cb3d6d53f071..30c34472289e7b 100644
--- a/lldb/source/ValueObject/CMakeLists.txt
+++ b/lldb/source/ValueObject/CMakeLists.txt
@@ -1,4 +1,5 @@
add_lldb_library(lldbValueObject
+ DILLexer.cpp
ValueObject.cpp
ValueObjectCast.cpp
ValueObjectChild.cpp
>From 5e2ee55f800726910ad6e56a192554375f61bfb8 Mon Sep 17 00:00:00 2001
From: Caroline Tice <cmtice at google.com>
Date: Sat, 25 Jan 2025 16:56:30 -0800
Subject: [PATCH 3/5] Many changes, to address all the review comments:
- Remove "DIL" prefix from DILTokenKind and DILToken.
- Change the token kind from an enum class to an enum inside the Token
class.
- Use CamelCase for all the method names.
- Replace Token::SetValues method with assignments.
- Use a StringRef, not std::string, to hold the input string in the lexer.
- Update the lexer to lex all the tokens at one time. Added two new methods
for this: LexAll and GetNextToken.
- Made some of the Lexer methods private.
- Replaces StringMap with StringSwitch for fast keyword lookups.
- Updated GetTokenName to directly return StringRefs; removed default case from
switch statement.
- Cleaned up code format in IsLetter & IsDigit.
- Updated IsWord too return an iterator range containing the word (if any).
- Updated Lex function (now called by LexAll) to return an llvm::Expected
token; removed look_ahead checks; changed the operator lexing to use
a vector of operators (as suggested).
- Cleaned up LookAhead method, now that we know all tokens have already been
lexed.
- Added helper function to unittests, to help check a sequence of tokens.
- Generally cleaned up the tests to deal with all the code changes.
---
lldb/include/lldb/ValueObject/DILLexer.h | 110 ++++----
lldb/source/ValueObject/DILLexer.cpp | 214 +++++++--------
lldb/unittests/ValueObject/DILLexerTests.cpp | 269 ++++++++++---------
3 files changed, 302 insertions(+), 291 deletions(-)
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
index 45c506b2f4106d..61e5fe622e51e6 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -10,6 +10,8 @@
#define LLDB_VALUEOBJECT_DILLEXER_H_
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Error.h"
#include <cstdint>
#include <limits.h>
#include <memory>
@@ -20,58 +22,51 @@ namespace lldb_private {
namespace dil {
-enum class TokenKind {
- coloncolon,
- eof,
- identifier,
- invalid,
- kw_namespace,
- l_paren,
- none,
- r_paren,
- unknown,
-};
-
/// Class defining the tokens generated by the DIL lexer and used by the
/// DIL parser.
-class DILToken {
+class Token {
public:
- DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+ enum Kind {
+ coloncolon,
+ eof,
+ identifier,
+ invalid,
+ kw_namespace,
+ l_paren,
+ none,
+ r_paren,
+ unknown,
+ };
+
+ Token(Kind kind, std::string spelling, uint32_t start)
: m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
- DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+ Token() : m_kind(Kind::none), m_spelling(""), m_start_pos(0) {}
- void setKind(dil::TokenKind kind) { m_kind = kind; }
- dil::TokenKind getKind() const { return m_kind; }
+ void SetKind(Kind kind) { m_kind = kind; }
- std::string getSpelling() const { return m_spelling; }
+ Kind GetKind() const { return m_kind; }
- uint32_t getLength() const { return m_spelling.size(); }
+ std::string GetSpelling() const { return m_spelling; }
- bool is(dil::TokenKind kind) const { return m_kind == kind; }
+ uint32_t GetLength() const { return m_spelling.size(); }
- bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+ bool Is(Kind kind) const { return m_kind == kind; }
- bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
- return is(kind1) || is(kind2);
- }
+ bool IsNot(Kind kind) const { return m_kind != kind; }
- template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
- return is(kind) || isOneOf(Ks...);
- }
+ bool IsOneOf(Kind kind1, Kind kind2) const { return Is(kind1) || Is(kind2); }
- uint32_t getLocation() const { return m_start_pos; }
-
- void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
- m_kind = kind;
- m_spelling = spelling;
- m_start_pos = start;
+ template <typename... Ts> bool IsOneOf(Kind kind, Ts... Ks) const {
+ return Is(kind) || IsOneOf(Ks...);
}
- static const std::string getTokenName(dil::TokenKind kind);
+ uint32_t GetLocation() const { return m_start_pos; }
+
+ static llvm::StringRef GetTokenName(Kind kind);
private:
- dil::TokenKind m_kind;
+ Kind m_kind;
std::string m_spelling;
uint32_t m_start_pos; // within entire expression string
};
@@ -79,35 +74,30 @@ class DILToken {
/// Class for doing the simple lexing required by DIL.
class DILLexer {
public:
- DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+ DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr) {
m_cur_pos = m_expr.begin();
// Use UINT_MAX to indicate invalid/uninitialized value.
m_tokens_idx = UINT_MAX;
+ m_invalid_token = Token(Token::invalid, "", 0);
}
- bool Lex(DILToken &result, bool look_ahead = false);
-
- bool Is_Word(std::string::iterator start, uint32_t &length);
-
- uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
-
- /// Update 'result' with the other paremeter values, create a
- /// duplicate token, and push the duplicate token onto the vector of
- /// lexed tokens.
- void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
- std::string tok_str, uint32_t tok_pos);
+ llvm::Expected<bool> LexAll();
/// Return the lexed token N+1 positions ahead of the 'current' token
/// being handled by the DIL parser.
- const DILToken &LookAhead(uint32_t N);
+ const Token &LookAhead(uint32_t N);
+
+ const Token &AcceptLookAhead(uint32_t N);
- const DILToken &AcceptLookAhead(uint32_t N);
+ const Token &GetNextToken();
/// Return the index for the 'current' token being handled by the DIL parser.
uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
/// Return the current token to be handled by the DIL parser.
- DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+ const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+ uint32_t NumLexedTokens() { return m_lexed_tokens.size(); }
/// Update the index for the 'current' token, to point to the next lexed
/// token.
@@ -130,23 +120,35 @@ class DILLexer {
return true;
}
+ uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
private:
+ llvm::Expected<Token> Lex();
+
+ llvm::iterator_range<llvm::StringRef::iterator> IsWord();
+
+ /// Update 'result' with the other paremeter values, create a
+ /// duplicate token, and push the duplicate token onto the vector of
+ /// lexed tokens.
+ void UpdateLexedTokens(Token &result, Token::Kind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
// The input string we are lexing & parsing.
- std::string m_expr;
+ llvm::StringRef m_expr;
// The current position of the lexer within m_expr (the character position,
// within the string, of the next item to be lexed).
- std::string::iterator m_cur_pos;
+ llvm::StringRef::iterator m_cur_pos;
// Holds all of the tokens lexed so far.
- std::vector<DILToken> m_lexed_tokens;
+ std::vector<Token> m_lexed_tokens;
// Index into m_lexed_tokens; indicates which token the DIL parser is
// currently trying to parse/handle.
uint32_t m_tokens_idx;
// "invalid" token; to be returned by lexer when 'look ahead' fails.
- DILToken m_invalid_token;
+ Token m_invalid_token;
};
} // namespace dil
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
index 4c2b0b1813bb96..30e4bcb04e6505 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -12,107 +12,99 @@
//===----------------------------------------------------------------------===//
#include "lldb/ValueObject/DILLexer.h"
-#include "llvm/ADT/StringMap.h"
+#include "lldb/Utility/Status.h"
+#include "llvm/ADT/StringSwitch.h"
namespace lldb_private {
namespace dil {
-// For fast keyword lookup. More keywords will be added later.
-const llvm::StringMap<dil::TokenKind> Keywords = {
- {"namespace", dil::TokenKind::kw_namespace},
-};
-
-const std::string DILToken::getTokenName(dil::TokenKind kind) {
+llvm::StringRef Token::GetTokenName(Kind kind) {
switch (kind) {
- case dil::TokenKind::coloncolon:
+ case Kind::coloncolon:
return "coloncolon";
- case dil::TokenKind::eof:
+ case Kind::eof:
return "eof";
- case dil::TokenKind::identifier:
+ case Kind::identifier:
return "identifier";
- case dil::TokenKind::kw_namespace:
+ case Kind::invalid:
+ return "invalid";
+ case Kind::kw_namespace:
return "namespace";
- case dil::TokenKind::l_paren:
+ case Kind::l_paren:
return "l_paren";
- case dil::TokenKind::r_paren:
+ case Kind::none:
+ return "none";
+ case Kind::r_paren:
return "r_paren";
- case dil::TokenKind::unknown:
+ case Kind::unknown:
return "unknown";
- default:
- return "token_name";
}
}
-static bool Is_Letter(char c) {
- if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
- return true;
- return false;
+static bool IsLetter(char c) {
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
-static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); }
+static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
// A word starts with a letter, underscore, or dollar sign, followed by
// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
-bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) {
- bool done = false;
+llvm::iterator_range<llvm::StringRef::iterator> DILLexer::IsWord() {
+ llvm::StringRef::iterator start = m_cur_pos;
bool dollar_start = false;
// Must not start with a digit.
- if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos))
- return false;
+ if (m_cur_pos == m_expr.end() || IsDigit(*m_cur_pos))
+ return llvm::make_range(m_cur_pos, m_cur_pos);
// First character *may* be a '$', for a register name or convenience
// variable.
if (*m_cur_pos == '$') {
dollar_start = true;
++m_cur_pos;
- length++;
}
// Contains only letters, digits or underscores
- for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) {
+ for (; m_cur_pos != m_expr.end(); ++m_cur_pos) {
char c = *m_cur_pos;
- if (!Is_Letter(c) && !Is_Digit(c) && c != '_') {
- done = true;
+ if (!IsLetter(c) && !IsDigit(c) && c != '_')
break;
- } else
- length++;
}
- if (dollar_start && length > 1) // Must have something besides just '$'
- return true;
-
- if (!dollar_start && length > 0)
- return true;
+ // If first char is '$', make sure there's at least one mare char, or it's
+ // invalid.
+ if (dollar_start && (m_cur_pos - start <= 1)) {
+ m_cur_pos = start;
+ return llvm::make_range(start, start); // Empty range
+ }
- // Not a valid word, so re-set the lexing position.
- m_cur_pos = start;
- return false;
+ return llvm::make_range(start, m_cur_pos);
}
-void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+void DILLexer::UpdateLexedTokens(Token &result, Token::Kind tok_kind,
std::string tok_str, uint32_t tok_pos) {
- DILToken new_token;
- result.setValues(tok_kind, tok_str, tok_pos);
- new_token = result;
+ Token new_token(tok_kind, tok_str, tok_pos);
+ result = new_token;
m_lexed_tokens.push_back(std::move(new_token));
}
-bool DILLexer::Lex(DILToken &result, bool look_ahead) {
- bool retval = true;
-
- if (!look_ahead) {
- // We're being asked for the 'next' token, and not a part of a LookAhead.
- // Check to see if we've already lexed it and pushed it onto our tokens
- // vector; if so, return the next token from the vector, rather than doing
- // more lexing.
- if ((m_tokens_idx != UINT_MAX) &&
- (m_tokens_idx < m_lexed_tokens.size() - 1)) {
- result = m_lexed_tokens[m_tokens_idx + 1];
- return retval;
+llvm::Expected<bool> DILLexer::LexAll() {
+ bool done = false;
+ while (!done) {
+ auto tok_or_err = Lex();
+ if (!tok_or_err)
+ return tok_or_err.takeError();
+ Token token = *tok_or_err;
+ if (token.GetKind() == Token::eof) {
+ done = true;
}
}
+ return true;
+}
+
+llvm::Expected<Token> DILLexer::Lex() {
+ Token result;
// Skip over whitespace (spaces).
while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ')
@@ -120,79 +112,52 @@ bool DILLexer::Lex(DILToken &result, bool look_ahead) {
// Check to see if we've reached the end of our input string.
if (m_cur_pos == m_expr.end()) {
- UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length());
- return retval;
+ UpdateLexedTokens(result, Token::eof, "", (uint32_t)m_expr.size());
+ return result;
}
uint32_t position = m_cur_pos - m_expr.begin();
- ;
- std::string::iterator start = m_cur_pos;
- uint32_t length = 0;
- if (Is_Word(start, length)) {
- dil::TokenKind kind;
- std::string word = m_expr.substr(position, length);
- auto iter = Keywords.find(word);
- if (iter != Keywords.end())
- kind = iter->second;
- else
- kind = dil::TokenKind::identifier;
-
- UpdateLexedTokens(result, kind, word, position);
- return true;
+ llvm::StringRef::iterator start = m_cur_pos;
+ llvm::iterator_range<llvm::StringRef::iterator> word_range = IsWord();
+ if (!word_range.empty()) {
+ uint32_t length = word_range.end() - word_range.begin();
+ llvm::StringRef word(m_expr.substr(position, length));
+ // We will be adding more keywords here in the future...
+ Token::Kind kind = llvm::StringSwitch<Token::Kind>(word)
+ .Case("namespace", Token::kw_namespace)
+ .Default(Token::identifier);
+ UpdateLexedTokens(result, kind, word.str(), position);
+ return result;
}
- switch (*m_cur_pos) {
- case '(':
- m_cur_pos++;
- UpdateLexedTokens(result, dil::TokenKind::l_paren, "(", position);
- return true;
- case ')':
- m_cur_pos++;
- UpdateLexedTokens(result, dil::TokenKind::r_paren, ")", position);
- return true;
- case ':':
- if (position + 1 < m_expr.size() && m_expr[position + 1] == ':') {
- m_cur_pos += 2;
- UpdateLexedTokens(result, dil::TokenKind::coloncolon, "::", position);
- return true;
+ m_cur_pos = start;
+ llvm::StringRef remainder(m_expr.substr(position, m_expr.end() - m_cur_pos));
+ std::vector<std::pair<Token::Kind, const char *>> operators = {
+ {Token::l_paren, "("},
+ {Token::r_paren, ")"},
+ {Token::coloncolon, "::"},
+ };
+ for (auto [kind, str] : operators) {
+ if (remainder.consume_front(str)) {
+ m_cur_pos += strlen(str);
+ UpdateLexedTokens(result, kind, str, position);
+ return result;
}
- break;
- default:
- break;
}
- // Empty Token
- result.setValues(dil::TokenKind::none, "", m_expr.length());
- return false;
-}
-const DILToken &DILLexer::LookAhead(uint32_t N) {
- uint32_t extra_lexed_tokens = m_lexed_tokens.size() - m_tokens_idx - 1;
+ // Unrecognized character(s) in string; unable to lex it.
+ Status error("Unable to lex input string");
+ return error.ToError();
+}
- if (N + 1 < extra_lexed_tokens)
+const Token &DILLexer::LookAhead(uint32_t N) {
+ if (m_tokens_idx + N + 1 < m_lexed_tokens.size())
return m_lexed_tokens[m_tokens_idx + N + 1];
- uint32_t remaining_tokens =
- (m_tokens_idx + N + 1) - m_lexed_tokens.size() + 1;
-
- bool done = false;
- bool look_ahead = true;
- while (!done && remaining_tokens > 0) {
- DILToken tok;
- Lex(tok, look_ahead);
- if (tok.getKind() == dil::TokenKind::eof)
- done = true;
- remaining_tokens--;
- };
-
- if (remaining_tokens > 0) {
- m_invalid_token.setValues(dil::TokenKind::invalid, "", 0);
- return m_invalid_token;
- }
-
- return m_lexed_tokens[m_tokens_idx + N + 1];
+ return m_invalid_token;
}
-const DILToken &DILLexer::AcceptLookAhead(uint32_t N) {
+const Token &DILLexer::AcceptLookAhead(uint32_t N) {
if (m_tokens_idx + N + 1 > m_lexed_tokens.size())
return m_invalid_token;
@@ -200,6 +165,25 @@ const DILToken &DILLexer::AcceptLookAhead(uint32_t N) {
return m_lexed_tokens[m_tokens_idx];
}
+const Token &DILLexer::GetNextToken() {
+ if (m_tokens_idx == UINT_MAX)
+ m_tokens_idx = 0;
+ else
+ m_tokens_idx++;
+
+ // Return the next token in the vector of lexed tokens.
+ if (m_tokens_idx < m_lexed_tokens.size())
+ return m_lexed_tokens[m_tokens_idx];
+
+ // We're already at/beyond the end of our lexed tokens. If the last token
+ // is an eof token, return it.
+ if (m_lexed_tokens[m_lexed_tokens.size() - 1].GetKind() == Token::eof)
+ return m_lexed_tokens[m_lexed_tokens.size() - 1];
+
+ // Return the invalid token.
+ return m_invalid_token;
+}
+
} // namespace dil
} // namespace lldb_private
diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp
index ec6ff86b64d36b..137013e40d6adf 100644
--- a/lldb/unittests/ValueObject/DILLexerTests.cpp
+++ b/lldb/unittests/ValueObject/DILLexerTests.cpp
@@ -13,131 +13,145 @@
using llvm::StringRef;
+bool VerifyExpectedTokens(
+ lldb_private::dil::DILLexer &lexer,
+ std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
+ exp_tokens,
+ uint32_t start_pos) {
+ if (lexer.NumLexedTokens() - start_pos < exp_tokens.size())
+ return false;
+
+ if (start_pos > 0)
+ lexer.ResetTokenIdx(start_pos -
+ 1); // GetNextToken increments the idx first.
+ for (const auto &pair : exp_tokens) {
+ lldb_private::dil::Token token = lexer.GetNextToken();
+ if (token.GetKind() != pair.first || token.GetSpelling() != pair.second)
+ return false;
+ }
+
+ return true;
+}
+
TEST(DILLexerTests, SimpleTest) {
- StringRef dil_input_expr("simple_var");
+ StringRef input_expr("simple_var");
uint32_t tok_len = 10;
- lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
- lldb_private::dil::DILToken dil_token;
- dil_token.setKind(lldb_private::dil::TokenKind::unknown);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::unknown);
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- EXPECT_EQ(dil_token.getSpelling(), "simple_var");
- EXPECT_EQ(dil_token.getLength(), tok_len);
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof);
+ lldb_private::dil::DILLexer lexer(input_expr);
+ lldb_private::dil::Token token;
+ token.SetKind(lldb_private::dil::Token::unknown);
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::unknown);
+ auto success = lexer.LexAll();
+
+ if (!success) {
+ EXPECT_TRUE(false);
+ }
+ token = lexer.GetNextToken();
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(token.GetSpelling(), "simple_var");
+ EXPECT_EQ(token.GetLength(), tok_len);
+ token = lexer.GetNextToken();
+ ;
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
}
TEST(DILLexerTests, TokenKindTest) {
- StringRef dil_input_expr("namespace");
- lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
- lldb_private::dil::DILToken dil_token;
- dil_token.setKind(lldb_private::dil::TokenKind::unknown);
-
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX);
- dil_lexer.ResetTokenIdx(0);
-
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::kw_namespace);
- EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier));
- EXPECT_FALSE(dil_token.is(lldb_private::dil::TokenKind::l_paren));
- EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::eof,
- lldb_private::dil::TokenKind::kw_namespace));
- EXPECT_FALSE(dil_token.isOneOf(lldb_private::dil::TokenKind::l_paren,
- lldb_private::dil::TokenKind::r_paren,
- lldb_private::dil::TokenKind::coloncolon,
- lldb_private::dil::TokenKind::eof));
-
- dil_token.setKind(lldb_private::dil::TokenKind::identifier);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ StringRef input_expr("namespace");
+ lldb_private::dil::DILLexer lexer(input_expr);
+ lldb_private::dil::Token token;
+ token.SetKind(lldb_private::dil::Token::unknown);
+
+ auto success = lexer.LexAll();
+ if (!success) {
+ EXPECT_TRUE(false);
+ }
+ token = lexer.GetNextToken();
+
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::kw_namespace);
+ EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier));
+ EXPECT_FALSE(token.Is(lldb_private::dil::Token::l_paren));
+ EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::eof,
+ lldb_private::dil::Token::kw_namespace));
+ EXPECT_FALSE(token.IsOneOf(
+ lldb_private::dil::Token::l_paren, lldb_private::dil::Token::r_paren,
+ lldb_private::dil::Token::coloncolon, lldb_private::dil::Token::eof));
+
+ token.SetKind(lldb_private::dil::Token::identifier);
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
}
TEST(DILLexerTests, LookAheadTest) {
- StringRef dil_input_expr("(anonymous namespace)::some_var");
- lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
- lldb_private::dil::DILToken dil_token;
- dil_token.setKind(lldb_private::dil::TokenKind::unknown);
+ StringRef input_expr("(anonymous namespace)::some_var");
+ lldb_private::dil::DILLexer lexer(input_expr);
+ lldb_private::dil::Token token;
+ token.SetKind(lldb_private::dil::Token::unknown);
uint32_t expect_loc = 23;
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX);
- dil_lexer.ResetTokenIdx(0);
+ auto success = lexer.LexAll();
+ if (!success) {
+ EXPECT_TRUE(false);
+ }
+ token = lexer.GetNextToken();
// Current token is '('; check the next 4 tokens, to make
// sure they are the identifier 'anonymous', the namespace keyword,
// ')' and '::', in that order.
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::l_paren);
- EXPECT_EQ(dil_lexer.LookAhead(0).getKind(),
- lldb_private::dil::TokenKind::identifier);
- EXPECT_EQ(dil_lexer.LookAhead(0).getSpelling(), "anonymous");
- EXPECT_EQ(dil_lexer.LookAhead(1).getKind(),
- lldb_private::dil::TokenKind::kw_namespace);
- EXPECT_EQ(dil_lexer.LookAhead(2).getKind(),
- lldb_private::dil::TokenKind::r_paren);
- EXPECT_EQ(dil_lexer.LookAhead(3).getKind(),
- lldb_private::dil::TokenKind::coloncolon);
- // Verify we've advanced our position counter (lexing location) in the
- // input 23 characters (the length of '(anonymous namespace)::'.
- EXPECT_EQ(dil_lexer.GetLocation(), expect_loc);
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::l_paren);
+ EXPECT_EQ(lexer.LookAhead(0).GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(lexer.LookAhead(0).GetSpelling(), "anonymous");
+ EXPECT_EQ(lexer.LookAhead(1).GetKind(),
+ lldb_private::dil::Token::kw_namespace);
+ EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::r_paren);
+ EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::coloncolon);
// Our current index should still be 0, as we only looked ahead; we are still
// officially on the '('.
- EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 0);
+ EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)0);
// Accept the 'lookahead', so our current token is '::', which has the index
// 4 in our vector of tokens (which starts at zero).
- dil_token = dil_lexer.AcceptLookAhead(3);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::coloncolon);
- EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 4);
-
- // Lex the final variable name in the input string
- dil_lexer.Lex(dil_token);
- dil_lexer.IncrementTokenIdx();
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- EXPECT_EQ(dil_token.getSpelling(), "some_var");
- EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 5);
-
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof);
+ token = lexer.AcceptLookAhead(3);
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::coloncolon);
+ EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)4);
+
+ token = lexer.GetNextToken();
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(token.GetSpelling(), "some_var");
+ EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)5);
+ // Verify we've advanced our position counter (lexing location) in the
+ // input 23 characters (the length of '(anonymous namespace)::'.
+ EXPECT_EQ(token.GetLocation(), expect_loc);
+ token = lexer.GetNextToken();
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
}
TEST(DILLexerTests, MultiTokenLexTest) {
- StringRef dil_input_expr("This string has several identifiers");
- lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
- lldb_private::dil::DILToken dil_token;
- dil_token.setKind(lldb_private::dil::TokenKind::unknown);
-
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX);
- dil_lexer.ResetTokenIdx(0);
-
- EXPECT_EQ(dil_token.getSpelling(), "This");
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- dil_lexer.Lex(dil_token);
- dil_lexer.IncrementTokenIdx();
-
- EXPECT_EQ(dil_token.getSpelling(), "string");
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- dil_lexer.Lex(dil_token);
- dil_lexer.IncrementTokenIdx();
-
- EXPECT_EQ(dil_token.getSpelling(), "has");
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- dil_lexer.Lex(dil_token);
- dil_lexer.IncrementTokenIdx();
-
- EXPECT_EQ(dil_token.getSpelling(), "several");
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- dil_lexer.Lex(dil_token);
- dil_lexer.IncrementTokenIdx();
-
- EXPECT_EQ(dil_token.getSpelling(), "identifiers");
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
- dil_lexer.Lex(dil_token);
- dil_lexer.IncrementTokenIdx();
-
- EXPECT_EQ(dil_token.getSpelling(), "");
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof);
+ StringRef input_expr("This string has (several ) ::identifiers");
+ lldb_private::dil::DILLexer lexer(input_expr);
+ lldb_private::dil::Token token;
+ token.SetKind(lldb_private::dil::Token::unknown);
+
+ auto success = lexer.LexAll();
+ if (!success) {
+ EXPECT_TRUE(false);
+ }
+
+ std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
+ expected_tokens = {
+ {lldb_private::dil::Token::identifier, "This"},
+ {lldb_private::dil::Token::identifier, "string"},
+ {lldb_private::dil::Token::identifier, "has"},
+ {lldb_private::dil::Token::l_paren, "("},
+ {lldb_private::dil::Token::identifier, "several"},
+ {lldb_private::dil::Token::r_paren, ")"},
+ {lldb_private::dil::Token::coloncolon, "::"},
+ {lldb_private::dil::Token::identifier, "identifiers"},
+ };
+
+ EXPECT_TRUE(VerifyExpectedTokens(lexer, expected_tokens, 0));
+
+ token = lexer.GetNextToken();
+ EXPECT_EQ(token.GetSpelling(), "");
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
}
TEST(DILLexerTests, IdentifiersTest) {
@@ -166,28 +180,39 @@ TEST(DILLexerTests, IdentifiersTest) {
};
// Verify that all of the valid identifiers come out as identifier tokens.
- for (auto str : valid_identifiers) {
- StringRef dil_input_expr(str);
- lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
- lldb_private::dil::DILToken dil_token;
- dil_token.setKind(lldb_private::dil::TokenKind::unknown);
-
- dil_lexer.Lex(dil_token);
- EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier);
+ for (auto &str : valid_identifiers) {
+ SCOPED_TRACE(str);
+ lldb_private::dil::DILLexer lexer(str);
+ lldb_private::dil::Token token;
+ token.SetKind(lldb_private::dil::Token::unknown);
+
+ auto success = lexer.LexAll();
+ if (!success) {
+ EXPECT_TRUE(false);
+ }
+ token = lexer.GetNextToken();
+ EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
}
// Verify that none of the invalid identifiers come out as identifier tokens.
- for (auto str : invalid_identifiers) {
- StringRef dil_input_expr(str);
- lldb_private::dil::DILLexer dil_lexer(dil_input_expr);
- lldb_private::dil::DILToken dil_token;
- dil_token.setKind(lldb_private::dil::TokenKind::unknown);
-
- dil_lexer.Lex(dil_token);
- EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier));
- EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::unknown,
- lldb_private::dil::TokenKind::none,
- lldb_private::dil::TokenKind::eof,
- lldb_private::dil::TokenKind::kw_namespace));
+ for (auto &str : invalid_identifiers) {
+ SCOPED_TRACE(str);
+ lldb_private::dil::DILLexer lexer(str);
+ lldb_private::dil::Token token;
+ token.SetKind(lldb_private::dil::Token::unknown);
+
+ auto success = lexer.LexAll();
+ // In this case, it's ok for Lex() to return an error.
+ if (!success) {
+ llvm::consumeError(success.takeError());
+ } else {
+ // We didn't get an error; make sure we did not get an identifier token.
+ token = lexer.GetNextToken();
+ EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier));
+ EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::unknown,
+ lldb_private::dil::Token::none,
+ lldb_private::dil::Token::eof,
+ lldb_private::dil::Token::kw_namespace));
+ }
}
}
>From ccf5203595ec22d2e58d774ecbe58cdccfc2f106 Mon Sep 17 00:00:00 2001
From: Caroline Tice <cmtice at google.com>
Date: Thu, 30 Jan 2025 15:07:33 -0800
Subject: [PATCH 4/5] Address latest review comments:
- Remove 'namespace' as a keyword (make it a normal identifier)
- Remove 'invalid' and 'none' token types.
- Remove unnecessary SetKind and GetLength methods from Tokens.
- Re-arrange Lexer:
- Give it a static Create method, which pre-lexes all the tokens
- Make Lex method static
- Pull IsWord method out of Lexer class
- Make the Lexer constructor private.
- Remove LexAll, GetLocation, UpdateLexedTokens, AcceptLookAhead, GetNextToken,
and IncrementTokenIdx methods from Lexer class.
- Add new 'Advance' method (to help replace some of the removed methods).
- Update indexing in LookAhead (LookAead(0) now means the 'current' token).
- Remove m_cur_pos data member from Lexer class.
- Replace m_invalid_token with m_eof_token.
- Use 'remainder' StringRef to help with lexing.
- Update the unit tests to handle all the code changes in the Lexer.
- Update the unit tests to use ASSERT_THAT_EXPECTED to check llvm::Expected
return values.
- Update the unit tests to use "testing::ElementsAre(testing::Pair ..." to
verify all the lexed tokens; also added helper function ExtractTokenData, and
deleted function VerifyExpectedTokens.
---
lldb/include/lldb/ValueObject/DILLexer.h | 97 +++-----
lldb/source/ValueObject/DILLexer.cpp | 153 ++++--------
lldb/unittests/ValueObject/CMakeLists.txt | 1 +
lldb/unittests/ValueObject/DILLexerTests.cpp | 232 ++++++++-----------
4 files changed, 187 insertions(+), 296 deletions(-)
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
index 61e5fe622e51e6..9e6cec18a68672 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -18,9 +18,7 @@
#include <string>
#include <vector>
-namespace lldb_private {
-
-namespace dil {
+namespace lldb_private::dil {
/// Class defining the tokens generated by the DIL lexer and used by the
/// DIL parser.
@@ -30,10 +28,7 @@ class Token {
coloncolon,
eof,
identifier,
- invalid,
- kw_namespace,
l_paren,
- none,
r_paren,
unknown,
};
@@ -41,16 +36,10 @@ class Token {
Token(Kind kind, std::string spelling, uint32_t start)
: m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
- Token() : m_kind(Kind::none), m_spelling(""), m_start_pos(0) {}
-
- void SetKind(Kind kind) { m_kind = kind; }
-
Kind GetKind() const { return m_kind; }
std::string GetSpelling() const { return m_spelling; }
- uint32_t GetLength() const { return m_spelling.size(); }
-
bool Is(Kind kind) const { return m_kind == kind; }
bool IsNot(Kind kind) const { return m_kind != kind; }
@@ -74,72 +63,58 @@ class Token {
/// Class for doing the simple lexing required by DIL.
class DILLexer {
public:
- DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr) {
- m_cur_pos = m_expr.begin();
- // Use UINT_MAX to indicate invalid/uninitialized value.
- m_tokens_idx = UINT_MAX;
- m_invalid_token = Token(Token::invalid, "", 0);
- }
-
- llvm::Expected<bool> LexAll();
-
- /// Return the lexed token N+1 positions ahead of the 'current' token
- /// being handled by the DIL parser.
- const Token &LookAhead(uint32_t N);
-
- const Token &AcceptLookAhead(uint32_t N);
-
- const Token &GetNextToken();
-
- /// Return the index for the 'current' token being handled by the DIL parser.
- uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+ /// Lexes all the tokens in expr and calls the private constructor
+ /// with the lexed tokens.
+ static llvm::Expected<DILLexer> Create(llvm::StringRef expr);
/// Return the current token to be handled by the DIL parser.
const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
- uint32_t NumLexedTokens() { return m_lexed_tokens.size(); }
+ /// Advance the current token position by N.
+ void Advance(uint32_t N = 1) {
+ // UINT_MAX means uninitialized, no "current" position, so move to start.
+ if (m_tokens_idx == UINT_MAX)
+ m_tokens_idx = 0;
+ else if (m_tokens_idx + N >= m_lexed_tokens.size())
+ // N is too large; advance to the end of the lexed tokens.
+ m_tokens_idx = m_lexed_tokens.size() - 1;
+ else
+ m_tokens_idx += N;
+ }
- /// Update the index for the 'current' token, to point to the next lexed
- /// token.
- bool IncrementTokenIdx() {
- if (m_tokens_idx >= m_lexed_tokens.size() - 1)
- return false;
+ /// Return the lexed token N positions ahead of the 'current' token
+ /// being handled by the DIL parser.
+ const Token &LookAhead(uint32_t N) {
+ if (m_tokens_idx + N < m_lexed_tokens.size())
+ return m_lexed_tokens[m_tokens_idx + N];
- m_tokens_idx++;
- return true;
+ return m_eof_token;
}
+ /// Return the index for the 'current' token being handled by the DIL parser.
+ uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
/// Set the index for the 'current' token (to be handled by the parser)
/// to a particular position. Used for either committing 'look ahead' parsing
/// or rolling back tentative parsing.
- bool ResetTokenIdx(uint32_t new_value) {
- if (new_value > m_lexed_tokens.size() - 1)
- return false;
-
+ void ResetTokenIdx(uint32_t new_value) {
+ assert(new_value == UINT_MAX || new_value < m_lexed_tokens.size());
m_tokens_idx = new_value;
- return true;
}
- uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+ uint32_t NumLexedTokens() { return m_lexed_tokens.size(); }
private:
- llvm::Expected<Token> Lex();
+ DILLexer(llvm::StringRef dil_expr, std::vector<Token> lexed_tokens)
+ : m_expr(dil_expr), m_lexed_tokens(lexed_tokens), m_tokens_idx(UINT_MAX),
+ m_eof_token(Token(Token::eof, "", 0)) {}
- llvm::iterator_range<llvm::StringRef::iterator> IsWord();
-
- /// Update 'result' with the other paremeter values, create a
- /// duplicate token, and push the duplicate token onto the vector of
- /// lexed tokens.
- void UpdateLexedTokens(Token &result, Token::Kind tok_kind,
- std::string tok_str, uint32_t tok_pos);
+ static llvm::Expected<Token> Lex(llvm::StringRef expr,
+ llvm::StringRef &remainder);
// The input string we are lexing & parsing.
llvm::StringRef m_expr;
- // The current position of the lexer within m_expr (the character position,
- // within the string, of the next item to be lexed).
- llvm::StringRef::iterator m_cur_pos;
-
// Holds all of the tokens lexed so far.
std::vector<Token> m_lexed_tokens;
@@ -147,12 +122,10 @@ class DILLexer {
// currently trying to parse/handle.
uint32_t m_tokens_idx;
- // "invalid" token; to be returned by lexer when 'look ahead' fails.
- Token m_invalid_token;
+ // "eof" token; to be returned by lexer when 'look ahead' fails.
+ Token m_eof_token;
};
-} // namespace dil
-
-} // namespace lldb_private
+} // namespace lldb_private::dil
#endif // LLDB_VALUEOBJECT_DILLEXER_H_
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
index 30e4bcb04e6505..b92bb86c8219c6 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -15,9 +15,7 @@
#include "lldb/Utility/Status.h"
#include "llvm/ADT/StringSwitch.h"
-namespace lldb_private {
-
-namespace dil {
+namespace lldb_private::dil {
llvm::StringRef Token::GetTokenName(Kind kind) {
switch (kind) {
@@ -27,14 +25,8 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
return "eof";
case Kind::identifier:
return "identifier";
- case Kind::invalid:
- return "invalid";
- case Kind::kw_namespace:
- return "namespace";
case Kind::l_paren:
return "l_paren";
- case Kind::none:
- return "none";
case Kind::r_paren:
return "r_paren";
case Kind::unknown:
@@ -50,140 +42,91 @@ static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
// A word starts with a letter, underscore, or dollar sign, followed by
// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
-llvm::iterator_range<llvm::StringRef::iterator> DILLexer::IsWord() {
- llvm::StringRef::iterator start = m_cur_pos;
+static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
+ llvm::StringRef &remainder) {
+ llvm::StringRef::iterator cur_pos = expr.end() - remainder.size();
+ llvm::StringRef::iterator start = cur_pos;
bool dollar_start = false;
// Must not start with a digit.
- if (m_cur_pos == m_expr.end() || IsDigit(*m_cur_pos))
- return llvm::make_range(m_cur_pos, m_cur_pos);
+ if (cur_pos == expr.end() || IsDigit(*cur_pos))
+ return std::nullopt;
// First character *may* be a '$', for a register name or convenience
// variable.
- if (*m_cur_pos == '$') {
+ if (*cur_pos == '$') {
dollar_start = true;
- ++m_cur_pos;
+ ++cur_pos;
}
// Contains only letters, digits or underscores
- for (; m_cur_pos != m_expr.end(); ++m_cur_pos) {
- char c = *m_cur_pos;
+ for (; cur_pos != expr.end(); ++cur_pos) {
+ char c = *cur_pos;
if (!IsLetter(c) && !IsDigit(c) && c != '_')
break;
}
// If first char is '$', make sure there's at least one mare char, or it's
// invalid.
- if (dollar_start && (m_cur_pos - start <= 1)) {
- m_cur_pos = start;
- return llvm::make_range(start, start); // Empty range
+ if (dollar_start && (cur_pos - start <= 1)) {
+ cur_pos = start;
+ return std::nullopt;
}
- return llvm::make_range(start, m_cur_pos);
-}
+ if (cur_pos == start)
+ return std::nullopt;
+
+ llvm::StringRef word = expr.substr(start - expr.begin(), cur_pos - start);
+ if (remainder.consume_front(word))
+ return word;
-void DILLexer::UpdateLexedTokens(Token &result, Token::Kind tok_kind,
- std::string tok_str, uint32_t tok_pos) {
- Token new_token(tok_kind, tok_str, tok_pos);
- result = new_token;
- m_lexed_tokens.push_back(std::move(new_token));
+ return std::nullopt;
}
-llvm::Expected<bool> DILLexer::LexAll() {
- bool done = false;
- while (!done) {
- auto tok_or_err = Lex();
- if (!tok_or_err)
- return tok_or_err.takeError();
- Token token = *tok_or_err;
- if (token.GetKind() == Token::eof) {
- done = true;
+llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
+ std::vector<Token> tokens;
+ llvm::StringRef remainder = expr;
+ do {
+ if (llvm::Expected<Token> t = Lex(expr, remainder)) {
+ tokens.push_back(std::move(*t));
+ } else {
+ return t.takeError();
}
- }
- return true;
+ } while (tokens.back().GetKind() != Token::eof);
+ return DILLexer(expr, std::move(tokens));
}
-llvm::Expected<Token> DILLexer::Lex() {
- Token result;
-
+llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
+ llvm::StringRef &remainder) {
// Skip over whitespace (spaces).
- while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ')
- m_cur_pos++;
+ remainder = remainder.ltrim();
+ llvm::StringRef::iterator cur_pos = expr.end() - remainder.size();
// Check to see if we've reached the end of our input string.
- if (m_cur_pos == m_expr.end()) {
- UpdateLexedTokens(result, Token::eof, "", (uint32_t)m_expr.size());
- return result;
+ if (remainder.empty() || cur_pos == expr.end())
+ return Token(Token::eof, "", (uint32_t)expr.size());
+
+ uint32_t position = cur_pos - expr.begin();
+ std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
+ if (maybe_word) {
+ llvm::StringRef word = *maybe_word;
+ return Token(Token::identifier, word.str(), position);
}
- uint32_t position = m_cur_pos - m_expr.begin();
- llvm::StringRef::iterator start = m_cur_pos;
- llvm::iterator_range<llvm::StringRef::iterator> word_range = IsWord();
- if (!word_range.empty()) {
- uint32_t length = word_range.end() - word_range.begin();
- llvm::StringRef word(m_expr.substr(position, length));
- // We will be adding more keywords here in the future...
- Token::Kind kind = llvm::StringSwitch<Token::Kind>(word)
- .Case("namespace", Token::kw_namespace)
- .Default(Token::identifier);
- UpdateLexedTokens(result, kind, word.str(), position);
- return result;
- }
-
- m_cur_pos = start;
- llvm::StringRef remainder(m_expr.substr(position, m_expr.end() - m_cur_pos));
- std::vector<std::pair<Token::Kind, const char *>> operators = {
+ constexpr std::pair<Token::Kind, const char *> operators[] = {
{Token::l_paren, "("},
{Token::r_paren, ")"},
{Token::coloncolon, "::"},
};
for (auto [kind, str] : operators) {
if (remainder.consume_front(str)) {
- m_cur_pos += strlen(str);
- UpdateLexedTokens(result, kind, str, position);
- return result;
+ cur_pos += strlen(str);
+ return Token(kind, str, position);
}
}
// Unrecognized character(s) in string; unable to lex it.
- Status error("Unable to lex input string");
- return error.ToError();
-}
-
-const Token &DILLexer::LookAhead(uint32_t N) {
- if (m_tokens_idx + N + 1 < m_lexed_tokens.size())
- return m_lexed_tokens[m_tokens_idx + N + 1];
-
- return m_invalid_token;
+ return llvm::createStringError("Unable to lex input string");
}
-const Token &DILLexer::AcceptLookAhead(uint32_t N) {
- if (m_tokens_idx + N + 1 > m_lexed_tokens.size())
- return m_invalid_token;
-
- m_tokens_idx += N + 1;
- return m_lexed_tokens[m_tokens_idx];
-}
-
-const Token &DILLexer::GetNextToken() {
- if (m_tokens_idx == UINT_MAX)
- m_tokens_idx = 0;
- else
- m_tokens_idx++;
-
- // Return the next token in the vector of lexed tokens.
- if (m_tokens_idx < m_lexed_tokens.size())
- return m_lexed_tokens[m_tokens_idx];
-
- // We're already at/beyond the end of our lexed tokens. If the last token
- // is an eof token, return it.
- if (m_lexed_tokens[m_lexed_tokens.size() - 1].GetKind() == Token::eof)
- return m_lexed_tokens[m_lexed_tokens.size() - 1];
-
- // Return the invalid token.
- return m_invalid_token;
-}
-
-} // namespace dil
-
-} // namespace lldb_private
+} // namespace lldb_private::dil
diff --git a/lldb/unittests/ValueObject/CMakeLists.txt b/lldb/unittests/ValueObject/CMakeLists.txt
index 952f5411a98057..14808aa2f213a5 100644
--- a/lldb/unittests/ValueObject/CMakeLists.txt
+++ b/lldb/unittests/ValueObject/CMakeLists.txt
@@ -6,6 +6,7 @@ add_lldb_unittest(LLDBValueObjectTests
lldbValueObject
lldbPluginPlatformLinux
lldbPluginScriptInterpreterNone
+ LLVMTestingSupport
LINK_COMPONENTS
Support
diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp
index 137013e40d6adf..f5523d3c5c4068 100644
--- a/lldb/unittests/ValueObject/DILLexerTests.cpp
+++ b/lldb/unittests/ValueObject/DILLexerTests.cpp
@@ -8,100 +8,90 @@
#include "lldb/ValueObject/DILLexer.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
#include <string>
using llvm::StringRef;
-bool VerifyExpectedTokens(
- lldb_private::dil::DILLexer &lexer,
- std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
- exp_tokens,
- uint32_t start_pos) {
- if (lexer.NumLexedTokens() - start_pos < exp_tokens.size())
- return false;
-
- if (start_pos > 0)
- lexer.ResetTokenIdx(start_pos -
- 1); // GetNextToken increments the idx first.
- for (const auto &pair : exp_tokens) {
- lldb_private::dil::Token token = lexer.GetNextToken();
- if (token.GetKind() != pair.first || token.GetSpelling() != pair.second)
- return false;
- }
-
- return true;
+std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
+ExtractTokenData(lldb_private::dil::DILLexer &lexer) {
+ std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> data;
+ if (lexer.NumLexedTokens() == 0)
+ return data;
+
+ lexer.ResetTokenIdx(UINT_MAX);
+ do {
+ lexer.Advance();
+ lldb_private::dil::Token tok = lexer.GetCurrentToken();
+ data.push_back(std::make_pair(tok.GetKind(), tok.GetSpelling()));
+ } while (data.back().first != lldb_private::dil::Token::eof);
+ return data;
}
TEST(DILLexerTests, SimpleTest) {
StringRef input_expr("simple_var");
uint32_t tok_len = 10;
- lldb_private::dil::DILLexer lexer(input_expr);
- lldb_private::dil::Token token;
- token.SetKind(lldb_private::dil::Token::unknown);
+ llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
+ lldb_private::dil::DILLexer::Create(input_expr);
+ ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
+ lldb_private::dil::DILLexer lexer(*maybe_lexer);
+ lldb_private::dil::Token token =
+ lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::unknown);
- auto success = lexer.LexAll();
- if (!success) {
- EXPECT_TRUE(false);
- }
- token = lexer.GetNextToken();
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
EXPECT_EQ(token.GetSpelling(), "simple_var");
- EXPECT_EQ(token.GetLength(), tok_len);
- token = lexer.GetNextToken();
- ;
+ EXPECT_EQ(token.GetSpelling().size(), tok_len);
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
}
TEST(DILLexerTests, TokenKindTest) {
StringRef input_expr("namespace");
- lldb_private::dil::DILLexer lexer(input_expr);
- lldb_private::dil::Token token;
- token.SetKind(lldb_private::dil::Token::unknown);
-
- auto success = lexer.LexAll();
- if (!success) {
- EXPECT_TRUE(false);
- }
- token = lexer.GetNextToken();
-
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::kw_namespace);
- EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier));
+ llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
+ lldb_private::dil::DILLexer::Create(input_expr);
+ ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
+ lldb_private::dil::DILLexer lexer(*maybe_lexer);
+ lldb_private::dil::Token token =
+ lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
+
+ EXPECT_TRUE(token.Is(lldb_private::dil::Token::identifier));
EXPECT_FALSE(token.Is(lldb_private::dil::Token::l_paren));
EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::eof,
- lldb_private::dil::Token::kw_namespace));
+ lldb_private::dil::Token::identifier));
EXPECT_FALSE(token.IsOneOf(
lldb_private::dil::Token::l_paren, lldb_private::dil::Token::r_paren,
lldb_private::dil::Token::coloncolon, lldb_private::dil::Token::eof));
-
- token.SetKind(lldb_private::dil::Token::identifier);
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
}
TEST(DILLexerTests, LookAheadTest) {
StringRef input_expr("(anonymous namespace)::some_var");
- lldb_private::dil::DILLexer lexer(input_expr);
- lldb_private::dil::Token token;
- token.SetKind(lldb_private::dil::Token::unknown);
+ llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
+ lldb_private::dil::DILLexer::Create(input_expr);
+ ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
+ lldb_private::dil::DILLexer lexer(*maybe_lexer);
+ lldb_private::dil::Token token =
+ lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
uint32_t expect_loc = 23;
-
- auto success = lexer.LexAll();
- if (!success) {
- EXPECT_TRUE(false);
- }
- token = lexer.GetNextToken();
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
// Current token is '('; check the next 4 tokens, to make
- // sure they are the identifier 'anonymous', the namespace keyword,
+ // sure they are the identifier 'anonymous', the identifier 'namespace'
// ')' and '::', in that order.
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::l_paren);
- EXPECT_EQ(lexer.LookAhead(0).GetKind(), lldb_private::dil::Token::identifier);
- EXPECT_EQ(lexer.LookAhead(0).GetSpelling(), "anonymous");
- EXPECT_EQ(lexer.LookAhead(1).GetKind(),
- lldb_private::dil::Token::kw_namespace);
- EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::r_paren);
- EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::coloncolon);
+ EXPECT_EQ(lexer.LookAhead(1).GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(lexer.LookAhead(1).GetSpelling(), "anonymous");
+ EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(lexer.LookAhead(2).GetSpelling(), "namespace");
+ EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::r_paren);
+ EXPECT_EQ(lexer.LookAhead(4).GetKind(), lldb_private::dil::Token::coloncolon);
// Our current index should still be 0, as we only looked ahead; we are still
// officially on the '('.
@@ -109,110 +99,94 @@ TEST(DILLexerTests, LookAheadTest) {
// Accept the 'lookahead', so our current token is '::', which has the index
// 4 in our vector of tokens (which starts at zero).
- token = lexer.AcceptLookAhead(3);
+ lexer.Advance(4);
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::coloncolon);
EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)4);
- token = lexer.GetNextToken();
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
EXPECT_EQ(token.GetSpelling(), "some_var");
EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)5);
// Verify we've advanced our position counter (lexing location) in the
// input 23 characters (the length of '(anonymous namespace)::'.
EXPECT_EQ(token.GetLocation(), expect_loc);
- token = lexer.GetNextToken();
+
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
}
TEST(DILLexerTests, MultiTokenLexTest) {
StringRef input_expr("This string has (several ) ::identifiers");
- lldb_private::dil::DILLexer lexer(input_expr);
- lldb_private::dil::Token token;
- token.SetKind(lldb_private::dil::Token::unknown);
-
- auto success = lexer.LexAll();
- if (!success) {
- EXPECT_TRUE(false);
- }
+ llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
+ lldb_private::dil::DILLexer::Create(input_expr);
+ ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
+ lldb_private::dil::DILLexer lexer(*maybe_lexer);
+ lldb_private::dil::Token token =
+ lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
- expected_tokens = {
- {lldb_private::dil::Token::identifier, "This"},
- {lldb_private::dil::Token::identifier, "string"},
- {lldb_private::dil::Token::identifier, "has"},
- {lldb_private::dil::Token::l_paren, "("},
- {lldb_private::dil::Token::identifier, "several"},
- {lldb_private::dil::Token::r_paren, ")"},
- {lldb_private::dil::Token::coloncolon, "::"},
- {lldb_private::dil::Token::identifier, "identifiers"},
- };
-
- EXPECT_TRUE(VerifyExpectedTokens(lexer, expected_tokens, 0));
-
- token = lexer.GetNextToken();
+ lexer_tokens_data = ExtractTokenData(lexer);
+
+ EXPECT_THAT(
+ lexer_tokens_data,
+ testing::ElementsAre(
+ testing::Pair(lldb_private::dil::Token::identifier, "This"),
+ testing::Pair(lldb_private::dil::Token::identifier, "string"),
+ testing::Pair(lldb_private::dil::Token::identifier, "has"),
+ testing::Pair(lldb_private::dil::Token::l_paren, "("),
+ testing::Pair(lldb_private::dil::Token::identifier, "several"),
+ testing::Pair(lldb_private::dil::Token::r_paren, ")"),
+ testing::Pair(lldb_private::dil::Token::coloncolon, "::"),
+ testing::Pair(lldb_private::dil::Token::identifier, "identifiers"),
+ testing::Pair(lldb_private::dil::Token::eof, "")));
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetSpelling(), "");
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
}
TEST(DILLexerTests, IdentifiersTest) {
std::vector<std::string> valid_identifiers = {
- "$My_name1",
- "$pc",
- "abcd",
- "ab cd",
- "_",
- "_a",
- "_a_",
- "a_b",
- "this",
- "self",
- "a",
- "MyName"
- };
- std::vector<std::string> invalid_identifiers = {
- "234",
- "2a",
- "2",
- "$",
- "1MyName",
- "",
- "namespace"
- };
+ "$My_name1", "$pc", "abcd", "ab cd", "_", "_a", "_a_",
+ "a_b", "this", "self", "a", "MyName", "namespace"};
+ std::vector<std::string> invalid_identifiers = {"234", "2a", "2",
+ "$", "1MyName", ""};
// Verify that all of the valid identifiers come out as identifier tokens.
for (auto &str : valid_identifiers) {
SCOPED_TRACE(str);
- lldb_private::dil::DILLexer lexer(str);
- lldb_private::dil::Token token;
- token.SetKind(lldb_private::dil::Token::unknown);
-
- auto success = lexer.LexAll();
- if (!success) {
- EXPECT_TRUE(false);
- }
- token = lexer.GetNextToken();
+ llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
+ lldb_private::dil::DILLexer::Create(str);
+ ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
+ lldb_private::dil::DILLexer lexer(*maybe_lexer);
+ lldb_private::dil::Token token =
+ lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
}
// Verify that none of the invalid identifiers come out as identifier tokens.
for (auto &str : invalid_identifiers) {
SCOPED_TRACE(str);
- lldb_private::dil::DILLexer lexer(str);
- lldb_private::dil::Token token;
- token.SetKind(lldb_private::dil::Token::unknown);
-
- auto success = lexer.LexAll();
- // In this case, it's ok for Lex() to return an error.
- if (!success) {
- llvm::consumeError(success.takeError());
+ llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
+ lldb_private::dil::DILLexer::Create(str);
+ if (!maybe_lexer) {
+ llvm::consumeError(maybe_lexer.takeError());
+ // In this case, it's ok for lexing to return an error.
} else {
+ lldb_private::dil::DILLexer lexer(*maybe_lexer);
+ lldb_private::dil::Token token =
+ lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
// We didn't get an error; make sure we did not get an identifier token.
- token = lexer.GetNextToken();
+ lexer.Advance();
+ token = lexer.GetCurrentToken();
EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier));
EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::unknown,
- lldb_private::dil::Token::none,
- lldb_private::dil::Token::eof,
- lldb_private::dil::Token::kw_namespace));
+ lldb_private::dil::Token::eof));
}
}
}
>From 29e9f265ea342e84372c63adbfdac0882d2fd434 Mon Sep 17 00:00:00 2001
From: Caroline Tice <cmtice at google.com>
Date: Sun, 2 Feb 2025 22:06:04 -0800
Subject: [PATCH 5/5] Address more review comments:
- Use std::move on std::string & std::vector in constructor initializers.
- Remove some unnecessary code.
- Update ExtractTokenData (helper function in unit tests) to set up the lexer
and to the lexing inside the function; return an llvm::Expected value.
- Add 'using namespace lldb_private::dil;' to unit tests; clean up tests
accordingly.
- Minor code cleanups in the unit tests.
---
lldb/include/lldb/ValueObject/DILLexer.h | 9 +-
lldb/source/ValueObject/DILLexer.cpp | 10 +-
lldb/unittests/ValueObject/DILLexerTests.cpp | 162 ++++++++-----------
3 files changed, 72 insertions(+), 109 deletions(-)
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
index 9e6cec18a68672..3935bf7e8e5c5e 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -34,7 +34,7 @@ class Token {
};
Token(Kind kind, std::string spelling, uint32_t start)
- : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+ : m_kind(kind), m_spelling(std::move(spelling)), m_start_pos(start) {}
Kind GetKind() const { return m_kind; }
@@ -88,7 +88,8 @@ class DILLexer {
if (m_tokens_idx + N < m_lexed_tokens.size())
return m_lexed_tokens[m_tokens_idx + N];
- return m_eof_token;
+ // Last token should be an 'eof' token.
+ return m_lexed_tokens.back();
}
/// Return the index for the 'current' token being handled by the DIL parser.
@@ -106,8 +107,8 @@ class DILLexer {
private:
DILLexer(llvm::StringRef dil_expr, std::vector<Token> lexed_tokens)
- : m_expr(dil_expr), m_lexed_tokens(lexed_tokens), m_tokens_idx(UINT_MAX),
- m_eof_token(Token(Token::eof, "", 0)) {}
+ : m_expr(dil_expr), m_lexed_tokens(std::move(lexed_tokens)),
+ m_tokens_idx(UINT_MAX), m_eof_token(Token(Token::eof, "", 0)) {}
static llvm::Expected<Token> Lex(llvm::StringRef expr,
llvm::StringRef &remainder);
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
index b92bb86c8219c6..46ecea9b585f56 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -108,10 +108,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
uint32_t position = cur_pos - expr.begin();
std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
- if (maybe_word) {
- llvm::StringRef word = *maybe_word;
- return Token(Token::identifier, word.str(), position);
- }
+ if (maybe_word)
+ return Token(Token::identifier, maybe_word->str(), position);
constexpr std::pair<Token::Kind, const char *> operators[] = {
{Token::l_paren, "("},
@@ -119,10 +117,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
{Token::coloncolon, "::"},
};
for (auto [kind, str] : operators) {
- if (remainder.consume_front(str)) {
- cur_pos += strlen(str);
+ if (remainder.consume_front(str))
return Token(kind, str, position);
- }
}
// Unrecognized character(s) in string; unable to lex it.
diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp
index f5523d3c5c4068..b6858246b8850c 100644
--- a/lldb/unittests/ValueObject/DILLexerTests.cpp
+++ b/lldb/unittests/ValueObject/DILLexerTests.cpp
@@ -14,179 +14,145 @@
using llvm::StringRef;
-std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
-ExtractTokenData(lldb_private::dil::DILLexer &lexer) {
- std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> data;
+using namespace lldb_private::dil;
+
+llvm::Expected<std::vector<std::pair<Token::Kind, std::string>>>
+ExtractTokenData(llvm::StringRef input_expr) {
+
+ llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(input_expr);
+ if (!maybe_lexer)
+ return maybe_lexer.takeError();
+ DILLexer lexer(*maybe_lexer);
+
if (lexer.NumLexedTokens() == 0)
- return data;
+ return llvm::createStringError("No lexed tokens");
lexer.ResetTokenIdx(UINT_MAX);
+ std::vector<std::pair<Token::Kind, std::string>> data;
do {
lexer.Advance();
- lldb_private::dil::Token tok = lexer.GetCurrentToken();
+ Token tok = lexer.GetCurrentToken();
data.push_back(std::make_pair(tok.GetKind(), tok.GetSpelling()));
- } while (data.back().first != lldb_private::dil::Token::eof);
+ } while (data.back().first != Token::eof);
return data;
}
TEST(DILLexerTests, SimpleTest) {
StringRef input_expr("simple_var");
- uint32_t tok_len = 10;
- llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
- lldb_private::dil::DILLexer::Create(input_expr);
+ llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(input_expr);
ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
- lldb_private::dil::DILLexer lexer(*maybe_lexer);
- lldb_private::dil::Token token =
- lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::unknown);
+ DILLexer lexer(*maybe_lexer);
+ Token token = Token(Token::unknown, "", 0);
+ EXPECT_EQ(token.GetKind(), Token::unknown);
lexer.Advance();
token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(token.GetKind(), Token::identifier);
EXPECT_EQ(token.GetSpelling(), "simple_var");
- EXPECT_EQ(token.GetSpelling().size(), tok_len);
lexer.Advance();
token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
+ EXPECT_EQ(token.GetKind(), Token::eof);
}
TEST(DILLexerTests, TokenKindTest) {
- StringRef input_expr("namespace");
- llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
- lldb_private::dil::DILLexer::Create(input_expr);
- ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
- lldb_private::dil::DILLexer lexer(*maybe_lexer);
- lldb_private::dil::Token token =
- lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
- lexer.Advance();
- token = lexer.GetCurrentToken();
+ Token token = Token(Token::identifier, "ident", 0);
- EXPECT_TRUE(token.Is(lldb_private::dil::Token::identifier));
- EXPECT_FALSE(token.Is(lldb_private::dil::Token::l_paren));
- EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::eof,
- lldb_private::dil::Token::identifier));
- EXPECT_FALSE(token.IsOneOf(
- lldb_private::dil::Token::l_paren, lldb_private::dil::Token::r_paren,
- lldb_private::dil::Token::coloncolon, lldb_private::dil::Token::eof));
+ EXPECT_TRUE(token.Is(Token::identifier));
+ EXPECT_FALSE(token.Is(Token::l_paren));
+ EXPECT_TRUE(token.IsOneOf(Token::eof, Token::identifier));
+ EXPECT_FALSE(token.IsOneOf(Token::l_paren, Token::r_paren, Token::coloncolon,
+ Token::eof));
}
TEST(DILLexerTests, LookAheadTest) {
StringRef input_expr("(anonymous namespace)::some_var");
- llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
- lldb_private::dil::DILLexer::Create(input_expr);
+ llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(input_expr);
ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
- lldb_private::dil::DILLexer lexer(*maybe_lexer);
- lldb_private::dil::Token token =
- lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
- uint32_t expect_loc = 23;
+ DILLexer lexer(*maybe_lexer);
+ Token token = Token(Token::unknown, "", 0);
lexer.Advance();
token = lexer.GetCurrentToken();
// Current token is '('; check the next 4 tokens, to make
// sure they are the identifier 'anonymous', the identifier 'namespace'
// ')' and '::', in that order.
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::l_paren);
- EXPECT_EQ(lexer.LookAhead(1).GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(token.GetKind(), Token::l_paren);
+ EXPECT_EQ(lexer.LookAhead(1).GetKind(), Token::identifier);
EXPECT_EQ(lexer.LookAhead(1).GetSpelling(), "anonymous");
- EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(lexer.LookAhead(2).GetKind(), Token::identifier);
EXPECT_EQ(lexer.LookAhead(2).GetSpelling(), "namespace");
- EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::r_paren);
- EXPECT_EQ(lexer.LookAhead(4).GetKind(), lldb_private::dil::Token::coloncolon);
+ EXPECT_EQ(lexer.LookAhead(3).GetKind(), Token::r_paren);
+ EXPECT_EQ(lexer.LookAhead(4).GetKind(), Token::coloncolon);
// Our current index should still be 0, as we only looked ahead; we are still
// officially on the '('.
- EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)0);
+ EXPECT_EQ(lexer.GetCurrentTokenIdx(), 0u);
// Accept the 'lookahead', so our current token is '::', which has the index
// 4 in our vector of tokens (which starts at zero).
lexer.Advance(4);
token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::coloncolon);
- EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)4);
+ EXPECT_EQ(token.GetKind(), Token::coloncolon);
+ EXPECT_EQ(lexer.GetCurrentTokenIdx(), 4u);
lexer.Advance();
token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_EQ(token.GetKind(), Token::identifier);
EXPECT_EQ(token.GetSpelling(), "some_var");
- EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)5);
- // Verify we've advanced our position counter (lexing location) in the
- // input 23 characters (the length of '(anonymous namespace)::'.
- EXPECT_EQ(token.GetLocation(), expect_loc);
+ EXPECT_EQ(lexer.GetCurrentTokenIdx(), 5u);
+ EXPECT_EQ(token.GetLocation(), strlen("(anonymous namespace)::"));
lexer.Advance();
token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
+ EXPECT_EQ(token.GetKind(), Token::eof);
}
TEST(DILLexerTests, MultiTokenLexTest) {
- StringRef input_expr("This string has (several ) ::identifiers");
- llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
- lldb_private::dil::DILLexer::Create(input_expr);
- ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
- lldb_private::dil::DILLexer lexer(*maybe_lexer);
- lldb_private::dil::Token token =
- lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
-
- std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>>
- lexer_tokens_data = ExtractTokenData(lexer);
-
- EXPECT_THAT(
- lexer_tokens_data,
- testing::ElementsAre(
- testing::Pair(lldb_private::dil::Token::identifier, "This"),
- testing::Pair(lldb_private::dil::Token::identifier, "string"),
- testing::Pair(lldb_private::dil::Token::identifier, "has"),
- testing::Pair(lldb_private::dil::Token::l_paren, "("),
- testing::Pair(lldb_private::dil::Token::identifier, "several"),
- testing::Pair(lldb_private::dil::Token::r_paren, ")"),
- testing::Pair(lldb_private::dil::Token::coloncolon, "::"),
- testing::Pair(lldb_private::dil::Token::identifier, "identifiers"),
- testing::Pair(lldb_private::dil::Token::eof, "")));
- lexer.Advance();
- token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetSpelling(), "");
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof);
+ EXPECT_THAT_EXPECTED(
+ ExtractTokenData("This string has (several ) ::identifiers"),
+ llvm::HasValue(
+ testing::ElementsAre(testing::Pair(Token::identifier, "This"),
+ testing::Pair(Token::identifier, "string"),
+ testing::Pair(Token::identifier, "has"),
+ testing::Pair(Token::l_paren, "("),
+ testing::Pair(Token::identifier, "several"),
+ testing::Pair(Token::r_paren, ")"),
+ testing::Pair(Token::coloncolon, "::"),
+ testing::Pair(Token::identifier, "identifiers"),
+ testing::Pair(Token::eof, ""))));
}
TEST(DILLexerTests, IdentifiersTest) {
std::vector<std::string> valid_identifiers = {
- "$My_name1", "$pc", "abcd", "ab cd", "_", "_a", "_a_",
- "a_b", "this", "self", "a", "MyName", "namespace"};
+ "$My_name1", "$pc", "abcd", "_", "_a", "_a_",
+ "a_b", "this", "self", "a", "MyName", "namespace"};
std::vector<std::string> invalid_identifiers = {"234", "2a", "2",
"$", "1MyName", ""};
// Verify that all of the valid identifiers come out as identifier tokens.
for (auto &str : valid_identifiers) {
SCOPED_TRACE(str);
- llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
- lldb_private::dil::DILLexer::Create(str);
- ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
- lldb_private::dil::DILLexer lexer(*maybe_lexer);
- lldb_private::dil::Token token =
- lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
- lexer.Advance();
- token = lexer.GetCurrentToken();
- EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier);
+ EXPECT_THAT_EXPECTED(ExtractTokenData(str),
+ llvm::HasValue(testing::ElementsAre(
+ testing::Pair(Token::identifier, str),
+ testing::Pair(Token::eof, ""))));
}
// Verify that none of the invalid identifiers come out as identifier tokens.
for (auto &str : invalid_identifiers) {
SCOPED_TRACE(str);
- llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer =
- lldb_private::dil::DILLexer::Create(str);
+ llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(str);
if (!maybe_lexer) {
llvm::consumeError(maybe_lexer.takeError());
// In this case, it's ok for lexing to return an error.
} else {
- lldb_private::dil::DILLexer lexer(*maybe_lexer);
- lldb_private::dil::Token token =
- lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0);
+ DILLexer lexer(*maybe_lexer);
+ Token token = Token(Token::unknown, "", 0);
// We didn't get an error; make sure we did not get an identifier token.
lexer.Advance();
token = lexer.GetCurrentToken();
- EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier));
- EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::unknown,
- lldb_private::dil::Token::eof));
+ EXPECT_TRUE(token.IsNot(Token::identifier));
+ EXPECT_TRUE(token.IsOneOf(Token::unknown, Token::eof));
}
}
}
More information about the lldb-commits
mailing list