[Lldb-commits] [lldb] [lldb] Add tree-sitter based Rust syntax highlighting (PR #181282)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Mon Feb 16 15:46:08 PST 2026
https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/181282
>From 4987ef0c6bbc134367193ee8b26d0658f2985814 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Mon, 16 Feb 2026 15:36:45 -0800
Subject: [PATCH] [lldb] Add tree-sitter based Rust syntax highlighting
---
.../Highlighter/TreeSitter/CMakeLists.txt | 1 +
.../Plugins/Highlighter/TreeSitter/README.md | 1 +
.../TreeSitter/Rust/CMakeLists.txt | 16 +
.../TreeSitter/Rust/HighlightQuery.h.in | 17 +
.../Rust/RustTreeSitterHighlighter.cpp | 206 ++
.../Rust/RustTreeSitterHighlighter.h | 41 +
.../TreeSitter/Rust/tree-sitter-rust/LICENSE | 21 +
.../Rust/tree-sitter-rust/grammar.js | 1690 +++++++++++++++++
.../Rust/tree-sitter-rust/highlights.scm | 161 ++
.../Rust/tree-sitter-rust/scanner.c | 393 ++++
.../Rust/tree-sitter-rust/tree-sitter.json | 53 +
lldb/unittests/Highlighter/CMakeLists.txt | 7 +-
.../unittests/Highlighter/HighlighterTest.cpp | 128 +-
13 files changed, 2732 insertions(+), 3 deletions(-)
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/CMakeLists.txt
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/HighlightQuery.h.in
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.cpp
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/LICENSE
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/grammar.js
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/highlights.scm
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/scanner.c
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/tree-sitter.json
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
index 9236725b82b13..d36e888d58546 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -41,4 +41,5 @@ function(add_tree_sitter_grammar name source_dir binary_dir)
)
endfunction()
+add_subdirectory(Rust)
add_subdirectory(Swift)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/README.md b/lldb/source/Plugins/Highlighter/TreeSitter/README.md
index 3a631b6879b36..cc8405be5d196 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/README.md
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/README.md
@@ -14,3 +14,4 @@ Each plugin contains a vendored copy of the corresponding grammar in the
## Supported Languages
- Swift based on [swift-tree-sitter](https://github.com/tree-sitter/swift-tree-sitter) 0.9.0
+- Rust based on [tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) 0.24.0
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/CMakeLists.txt
new file mode 100644
index 0000000000000..25499022937cb
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_lldb_library(lldbPluginHighlighterTreeSitterRust PLUGIN
+ RustTreeSitterHighlighter.cpp
+
+ LINK_COMPONENTS
+ Support
+ LINK_LIBS
+ lldbCore
+ lldbUtility
+ lldbTreeSitter
+ tree-sitter-rust
+)
+
+add_tree_sitter_grammar(tree-sitter-rust
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/HighlightQuery.h.in b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/HighlightQuery.h.in
new file mode 100644
index 0000000000000..4b5803efb97ec
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/HighlightQuery.h.in
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_RUST_HIGHLIGHTQUERY_H
+#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_RUST_HIGHLIGHTQUERY_H
+
+#include "llvm/ADT/StringRef.h"
+
+static constexpr llvm::StringLiteral highlight_query =
+ R"__(@HIGHLIGHT_QUERY@)__";
+
+#endif // LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_RUST_HIGHLIGHTQUERY_H
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.cpp b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.cpp
new file mode 100644
index 0000000000000..3b28758548828
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.cpp
@@ -0,0 +1,206 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RustTreeSitterHighlighter.h"
+#include "lldb/Target/Language.h"
+
+LLDB_PLUGIN_DEFINE_ADV(RustTreeSitterHighlighter, HighlighterTreeSitterRust)
+
+extern "C" {
+const TSLanguage *tree_sitter_rust();
+}
+
+using namespace lldb_private;
+
+const TSLanguage *RustTreeSitterHighlighter::GetLanguage() const {
+ return tree_sitter_rust();
+}
+
+llvm::StringRef RustTreeSitterHighlighter::GetHighlightQuery() const {
+ static constexpr const llvm::StringLiteral query = R"__(
+; Identifiers
+
+(type_identifier) @type
+(primitive_type) @type.builtin
+(field_identifier) @property
+
+; Identifier conventions
+
+; Assume all-caps names are constants
+((identifier) @constant
+ (#match? @constant "^[A-Z][A-Z\\d_]+$'"))
+
+; Assume uppercase names are enum constructors
+((identifier) @constructor
+ (#match? @constructor "^[A-Z]"))
+
+; Assume that uppercase names in paths are types
+((scoped_identifier
+ path: (identifier) @type)
+ (#match? @type "^[A-Z]"))
+((scoped_identifier
+ path: (scoped_identifier
+ name: (identifier) @type))
+ (#match? @type "^[A-Z]"))
+((scoped_type_identifier
+ path: (identifier) @type)
+ (#match? @type "^[A-Z]"))
+((scoped_type_identifier
+ path: (scoped_identifier
+ name: (identifier) @type))
+ (#match? @type "^[A-Z]"))
+
+; Assume all qualified names in struct patterns are enum constructors. (They're
+; either that, or struct names; highlighting both as constructors seems to be
+; the less glaring choice of error, visually.)
+(struct_pattern
+ type: (scoped_type_identifier
+ name: (type_identifier) @constructor))
+
+; Function calls
+
+(call_expression
+ function: (identifier) @function)
+(call_expression
+ function: (field_expression
+ field: (field_identifier) @function.method))
+(call_expression
+ function: (scoped_identifier
+ "::"
+ name: (identifier) @function))
+
+(generic_function
+ function: (identifier) @function)
+(generic_function
+ function: (scoped_identifier
+ name: (identifier) @function))
+(generic_function
+ function: (field_expression
+ field: (field_identifier) @function.method))
+
+(macro_invocation
+ macro: (identifier) @function.macro
+ "!" @function.macro)
+
+; Function definitions
+
+(function_item (identifier) @function)
+(function_signature_item (identifier) @function)
+
+(line_comment) @comment
+(block_comment) @comment
+
+(line_comment (doc_comment)) @comment.documentation
+(block_comment (doc_comment)) @comment.documentation
+
+"(" @punctuation.bracket
+")" @punctuation.bracket
+"[" @punctuation.bracket
+"]" @punctuation.bracket
+"{" @punctuation.bracket
+"}" @punctuation.bracket
+
+(type_arguments
+ "<" @punctuation.bracket
+ ">" @punctuation.bracket)
+(type_parameters
+ "<" @punctuation.bracket
+ ">" @punctuation.bracket)
+
+"::" @punctuation.delimiter
+":" @punctuation.delimiter
+"." @punctuation.delimiter
+"," @punctuation.delimiter
+";" @punctuation.delimiter
+
+(parameter (identifier) @variable.parameter)
+
+(lifetime (identifier) @label)
+
+"as" @keyword
+"async" @keyword
+"await" @keyword
+"break" @keyword
+"const" @keyword
+"continue" @keyword
+"default" @keyword
+"dyn" @keyword
+"else" @keyword
+"enum" @keyword
+"extern" @keyword
+"fn" @keyword
+"for" @keyword
+"gen" @keyword
+"if" @keyword
+"impl" @keyword
+"in" @keyword
+"let" @keyword
+"loop" @keyword
+"macro_rules!" @keyword
+"match" @keyword
+"mod" @keyword
+"move" @keyword
+"pub" @keyword
+"raw" @keyword
+"ref" @keyword
+"return" @keyword
+"static" @keyword
+"struct" @keyword
+"trait" @keyword
+"type" @keyword
+"union" @keyword
+"unsafe" @keyword
+"use" @keyword
+"where" @keyword
+"while" @keyword
+"yield" @keyword
+(crate) @keyword
+(mutable_specifier) @keyword
+(use_list (self) @keyword)
+(scoped_use_list (self) @keyword)
+(scoped_identifier (self) @keyword)
+(super) @keyword
+
+(self) @variable.builtin
+
+(char_literal) @string
+(string_literal) @string
+(raw_string_literal) @string
+
+(boolean_literal) @constant.builtin
+(integer_literal) @constant.builtin
+(float_literal) @constant.builtin
+
+(escape_sequence) @escape
+
+(attribute_item) @attribute
+(inner_attribute_item) @attribute
+
+"*" @operator
+"&" @operator
+"'" @operator
+)__";
+
+ return query;
+}
+
+Highlighter *
+RustTreeSitterHighlighter::CreateInstance(lldb::LanguageType language) {
+ if (language == lldb::eLanguageTypeRust)
+ return new RustTreeSitterHighlighter();
+ return nullptr;
+}
+
+void RustTreeSitterHighlighter::Initialize() {
+ PluginManager::RegisterPlugin(GetPluginNameStatic(), GetPluginNameStatic(),
+ CreateInstance);
+}
+
+void RustTreeSitterHighlighter::Terminate() {
+ PluginManager::UnregisterPlugin(CreateInstance);
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h
new file mode 100644
index 0000000000000..ea0e7321d460e
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_RUSTTREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_RUSTTREESITTERHIGHLIGHTER_H
+
+#include "../TreeSitterHighlighter.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace lldb_private {
+
+class RustTreeSitterHighlighter : public TreeSitterHighlighter {
+public:
+ RustTreeSitterHighlighter() = default;
+ ~RustTreeSitterHighlighter() override = default;
+
+ llvm::StringRef GetName() const override { return "tree-sitter-rust"; }
+
+ static Highlighter *CreateInstance(lldb::LanguageType language);
+
+ static void Terminate();
+ static void Initialize();
+
+ static llvm::StringRef GetPluginNameStatic() {
+ return "Tree-sitter Rust Highlighter";
+ }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+protected:
+ const TSLanguage *GetLanguage() const override;
+ llvm::StringRef GetHighlightQuery() const override;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_RUSTTREESITTERHIGHLIGHTER_H
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/LICENSE b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/LICENSE
new file mode 100644
index 0000000000000..ceaf3c9adca54
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2017 Maxim Sokolov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/grammar.js b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/grammar.js
new file mode 100644
index 0000000000000..1907a3a3f56ca
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/grammar.js
@@ -0,0 +1,1690 @@
+/**
+ * @file Rust grammar for tree-sitter
+ * @author Maxim Sokolov <maxim0xff at gmail.com>
+ * @author Max Brunsfeld <maxbrunsfeld at gmail.com>
+ * @author Amaan Qureshi <amaanq12 at gmail.com>
+ * @license MIT
+ */
+
+/// <reference types="tree-sitter-cli/dsl" />
+// @ts-check
+
+// https://doc.rust-lang.org/reference/expressions.html#expression-precedence
+const PREC = {
+ call: 15,
+ field: 14,
+ try: 13,
+ unary: 12,
+ cast: 11,
+ multiplicative: 10,
+ additive: 9,
+ shift: 8,
+ bitand: 7,
+ bitxor: 6,
+ bitor: 5,
+ comparative: 4,
+ and: 3,
+ or: 2,
+ range: 1,
+ assign: 0,
+ closure: -1,
+};
+
+const numericTypes = [
+ 'u8',
+ 'i8',
+ 'u16',
+ 'i16',
+ 'u32',
+ 'i32',
+ 'u64',
+ 'i64',
+ 'u128',
+ 'i128',
+ 'isize',
+ 'usize',
+ 'f32',
+ 'f64',
+];
+
+// https://doc.rust-lang.org/reference/tokens.html#punctuation
+const TOKEN_TREE_NON_SPECIAL_PUNCTUATION = [
+ '+', '-', '*', '/', '%', '^', '!', '&', '|', '&&', '||', '<<',
+ '>>', '+=', '-=', '*=', '/=', '%=', '^=', '&=', '|=', '<<=',
+ '>>=', '=', '==', '!=', '>', '<', '>=', '<=', '@', '_', '.',
+ '..', '...', '..=', ',', ';', ':', '::', '->', '=>', '#', '?',
+];
+
+const primitiveTypes = numericTypes.concat(['bool', 'str', 'char']);
+
+module.exports = grammar({
+ name: 'rust',
+
+ extras: $ => [
+ /\s/,
+ $.line_comment,
+ $.block_comment,
+ ],
+
+ externals: $ => [
+ $.string_content,
+ $._raw_string_literal_start,
+ $.raw_string_literal_content,
+ $._raw_string_literal_end,
+ $.float_literal,
+ $._outer_block_doc_comment_marker,
+ $._inner_block_doc_comment_marker,
+ $._block_comment_content,
+ $._line_doc_content,
+ $._error_sentinel,
+ ],
+
+ supertypes: $ => [
+ $._expression,
+ $._type,
+ $._literal,
+ $._literal_pattern,
+ $._declaration_statement,
+ $._pattern,
+ ],
+
+ inline: $ => [
+ $._path,
+ $._type_identifier,
+ $._tokens,
+ $._field_identifier,
+ $._non_special_token,
+ $._declaration_statement,
+ $._reserved_identifier,
+ $._expression_ending_with_block,
+ ],
+
+ conflicts: $ => [
+ // Local ambiguity due to anonymous types:
+ // See https://internals.rust-lang.org/t/pre-rfc-deprecating-anonymous-parameters/3710
+ [$._type, $._pattern],
+ [$.unit_type, $.tuple_pattern],
+ [$.scoped_identifier, $.scoped_type_identifier],
+ [$.parameters, $._pattern],
+ [$.parameters, $.tuple_struct_pattern],
+ [$.array_expression],
+ [$.visibility_modifier],
+ [$.visibility_modifier, $.scoped_identifier, $.scoped_type_identifier],
+ ],
+
+ word: $ => $.identifier,
+
+ rules: {
+ source_file: $ => seq(
+ optional($.shebang),
+ repeat($._statement),
+ ),
+
+ _statement: $ => choice(
+ $.expression_statement,
+ $._declaration_statement,
+ ),
+
+ empty_statement: _ => ';',
+
+ expression_statement: $ => choice(
+ seq($._expression, ';'),
+ prec(1, $._expression_ending_with_block),
+ ),
+
+ _declaration_statement: $ => choice(
+ $.const_item,
+ $.macro_invocation,
+ $.macro_definition,
+ $.empty_statement,
+ $.attribute_item,
+ $.inner_attribute_item,
+ $.mod_item,
+ $.foreign_mod_item,
+ $.struct_item,
+ $.union_item,
+ $.enum_item,
+ $.type_item,
+ $.function_item,
+ $.function_signature_item,
+ $.impl_item,
+ $.trait_item,
+ $.associated_type,
+ $.let_declaration,
+ $.use_declaration,
+ $.extern_crate_declaration,
+ $.static_item,
+ ),
+
+ // Section - Macro definitions
+
+ macro_definition: $ => {
+ const rules = seq(
+ repeat(seq($.macro_rule, ';')),
+ optional($.macro_rule),
+ );
+
+ return seq(
+ 'macro_rules!',
+ field('name', choice(
+ $.identifier,
+ $._reserved_identifier,
+ )),
+ choice(
+ seq('(', rules, ')', ';'),
+ seq('[', rules, ']', ';'),
+ seq('{', rules, '}'),
+ ),
+ );
+ },
+
+ macro_rule: $ => seq(
+ field('left', $.token_tree_pattern),
+ '=>',
+ field('right', $.token_tree),
+ ),
+
+ _token_pattern: $ => choice(
+ $.token_tree_pattern,
+ $.token_repetition_pattern,
+ $.token_binding_pattern,
+ $.metavariable,
+ $._non_special_token,
+ ),
+
+ token_tree_pattern: $ => choice(
+ seq('(', repeat($._token_pattern), ')'),
+ seq('[', repeat($._token_pattern), ']'),
+ seq('{', repeat($._token_pattern), '}'),
+ ),
+
+ token_binding_pattern: $ => prec(1, seq(
+ field('name', $.metavariable),
+ ':',
+ field('type', $.fragment_specifier),
+ )),
+
+ token_repetition_pattern: $ => seq(
+ '$', '(', repeat($._token_pattern), ')', optional(/[^+*?]+/), choice('+', '*', '?'),
+ ),
+
+ fragment_specifier: _ => choice(
+ 'block', 'expr', 'expr_2021', 'ident', 'item', 'lifetime', 'literal', 'meta', 'pat',
+ 'pat_param', 'path', 'stmt', 'tt', 'ty', 'vis',
+ ),
+
+ _tokens: $ => choice(
+ $.token_tree,
+ $.token_repetition,
+ $.metavariable,
+ $._non_special_token,
+ ),
+
+ token_tree: $ => choice(
+ seq('(', repeat($._tokens), ')'),
+ seq('[', repeat($._tokens), ']'),
+ seq('{', repeat($._tokens), '}'),
+ ),
+
+ token_repetition: $ => seq(
+ '$', '(', repeat($._tokens), ')', optional(/[^+*?]+/), choice('+', '*', '?'),
+ ),
+
+ // Matches non-delimiter tokens common to both macro invocations and
+ // definitions. This is everything except $ and metavariables (which begin
+ // with $).
+ _non_special_token: $ => choice(
+ $._literal, $.identifier, $.mutable_specifier, $.self, $.super, $.crate,
+ alias(choice(...primitiveTypes), $.primitive_type),
+ prec.right(repeat1(choice(...TOKEN_TREE_NON_SPECIAL_PUNCTUATION))),
+ '\'',
+ 'as', 'async', 'await', 'break', 'const', 'continue', 'default', 'enum', 'fn', 'for', 'gen',
+ 'if', 'impl', 'let', 'loop', 'match', 'mod', 'pub', 'return', 'static', 'struct', 'trait',
+ 'type', 'union', 'unsafe', 'use', 'where', 'while',
+ ),
+
+ // Section - Declarations
+
+ attribute_item: $ => seq(
+ '#',
+ '[',
+ $.attribute,
+ ']',
+ ),
+
+ inner_attribute_item: $ => seq(
+ '#',
+ '!',
+ '[',
+ $.attribute,
+ ']',
+ ),
+
+ attribute: $ => seq(
+ $._path,
+ optional(choice(
+ seq('=', field('value', $._expression)),
+ field('arguments', alias($.delim_token_tree, $.token_tree)),
+ )),
+ ),
+
+ mod_item: $ => seq(
+ optional($.visibility_modifier),
+ 'mod',
+ field('name', $.identifier),
+ choice(
+ ';',
+ field('body', $.declaration_list),
+ ),
+ ),
+
+ foreign_mod_item: $ => seq(
+ optional($.visibility_modifier),
+ $.extern_modifier,
+ choice(
+ ';',
+ field('body', $.declaration_list),
+ ),
+ ),
+
+ declaration_list: $ => seq(
+ '{',
+ repeat($._declaration_statement),
+ '}',
+ ),
+
+ struct_item: $ => seq(
+ optional($.visibility_modifier),
+ 'struct',
+ field('name', $._type_identifier),
+ field('type_parameters', optional($.type_parameters)),
+ choice(
+ seq(
+ optional($.where_clause),
+ field('body', $.field_declaration_list),
+ ),
+ seq(
+ field('body', $.ordered_field_declaration_list),
+ optional($.where_clause),
+ ';',
+ ),
+ ';',
+ ),
+ ),
+
+ union_item: $ => seq(
+ optional($.visibility_modifier),
+ 'union',
+ field('name', $._type_identifier),
+ field('type_parameters', optional($.type_parameters)),
+ optional($.where_clause),
+ field('body', $.field_declaration_list),
+ ),
+
+ enum_item: $ => seq(
+ optional($.visibility_modifier),
+ 'enum',
+ field('name', $._type_identifier),
+ field('type_parameters', optional($.type_parameters)),
+ optional($.where_clause),
+ field('body', $.enum_variant_list),
+ ),
+
+ enum_variant_list: $ => seq(
+ '{',
+ sepBy(',', seq(repeat($.attribute_item), $.enum_variant)),
+ optional(','),
+ '}',
+ ),
+
+ enum_variant: $ => seq(
+ optional($.visibility_modifier),
+ field('name', $.identifier),
+ field('body', optional(choice(
+ $.field_declaration_list,
+ $.ordered_field_declaration_list,
+ ))),
+ optional(seq(
+ '=',
+ field('value', $._expression),
+ )),
+ ),
+
+ field_declaration_list: $ => seq(
+ '{',
+ sepBy(',', seq(repeat($.attribute_item), $.field_declaration)),
+ optional(','),
+ '}',
+ ),
+
+ field_declaration: $ => seq(
+ optional($.visibility_modifier),
+ field('name', $._field_identifier),
+ ':',
+ field('type', $._type),
+ ),
+
+ ordered_field_declaration_list: $ => seq(
+ '(',
+ sepBy(',', seq(
+ repeat($.attribute_item),
+ optional($.visibility_modifier),
+ field('type', $._type),
+ )),
+ optional(','),
+ ')',
+ ),
+
+ extern_crate_declaration: $ => seq(
+ optional($.visibility_modifier),
+ 'extern',
+ $.crate,
+ field('name', $.identifier),
+ optional(seq(
+ 'as',
+ field('alias', $.identifier),
+ )),
+ ';',
+ ),
+
+ const_item: $ => seq(
+ optional($.visibility_modifier),
+ 'const',
+ field('name', $.identifier),
+ ':',
+ field('type', $._type),
+ optional(
+ seq(
+ '=',
+ field('value', $._expression),
+ ),
+ ),
+ ';',
+ ),
+
+ static_item: $ => seq(
+ optional($.visibility_modifier),
+ 'static',
+
+ // Not actual rust syntax, but made popular by the lazy_static crate.
+ optional('ref'),
+
+ optional($.mutable_specifier),
+ field('name', $.identifier),
+ ':',
+ field('type', $._type),
+ optional(seq(
+ '=',
+ field('value', $._expression),
+ )),
+ ';',
+ ),
+
+ type_item: $ => seq(
+ optional($.visibility_modifier),
+ 'type',
+ field('name', $._type_identifier),
+ field('type_parameters', optional($.type_parameters)),
+ optional($.where_clause),
+ '=',
+ field('type', $._type),
+ optional($.where_clause),
+ ';',
+ ),
+
+ function_item: $ => seq(
+ optional($.visibility_modifier),
+ optional($.function_modifiers),
+ 'fn',
+ field('name', choice($.identifier, $.metavariable)),
+ field('type_parameters', optional($.type_parameters)),
+ field('parameters', $.parameters),
+ optional(seq('->', field('return_type', $._type))),
+ optional($.where_clause),
+ field('body', $.block),
+ ),
+
+ function_signature_item: $ => seq(
+ optional($.visibility_modifier),
+ optional($.function_modifiers),
+ 'fn',
+ field('name', choice($.identifier, $.metavariable)),
+ field('type_parameters', optional($.type_parameters)),
+ field('parameters', $.parameters),
+ optional(seq('->', field('return_type', $._type))),
+ optional($.where_clause),
+ ';',
+ ),
+
+ function_modifiers: $ => repeat1(choice(
+ 'async',
+ 'default',
+ 'const',
+ 'unsafe',
+ $.extern_modifier,
+ )),
+
+ where_clause: $ => prec.right(seq(
+ 'where',
+ optional(seq(
+ sepBy1(',', $.where_predicate),
+ optional(','),
+ )),
+ )),
+
+ where_predicate: $ => seq(
+ field('left', choice(
+ $.lifetime,
+ $._type_identifier,
+ $.scoped_type_identifier,
+ $.generic_type,
+ $.reference_type,
+ $.pointer_type,
+ $.tuple_type,
+ $.array_type,
+ $.higher_ranked_trait_bound,
+ alias(choice(...primitiveTypes), $.primitive_type),
+ )),
+ field('bounds', $.trait_bounds),
+ ),
+
+ impl_item: $ => seq(
+ optional('unsafe'),
+ 'impl',
+ field('type_parameters', optional($.type_parameters)),
+ optional(seq(
+ optional('!'),
+ field('trait', choice(
+ $._type_identifier,
+ $.scoped_type_identifier,
+ $.generic_type,
+ )),
+ 'for',
+ )),
+ field('type', $._type),
+ optional($.where_clause),
+ choice(field('body', $.declaration_list), ';'),
+ ),
+
+ trait_item: $ => seq(
+ optional($.visibility_modifier),
+ optional('unsafe'),
+ 'trait',
+ field('name', $._type_identifier),
+ field('type_parameters', optional($.type_parameters)),
+ field('bounds', optional($.trait_bounds)),
+ optional($.where_clause),
+ field('body', $.declaration_list),
+ ),
+
+ associated_type: $ => seq(
+ 'type',
+ field('name', $._type_identifier),
+ field('type_parameters', optional($.type_parameters)),
+ field('bounds', optional($.trait_bounds)),
+ optional($.where_clause),
+ ';',
+ ),
+
+ trait_bounds: $ => seq(
+ ':',
+ sepBy1('+', choice(
+ $._type,
+ $.lifetime,
+ $.higher_ranked_trait_bound,
+ )),
+ ),
+
+ higher_ranked_trait_bound: $ => seq(
+ 'for',
+ field('type_parameters', $.type_parameters),
+ field('type', $._type),
+ ),
+
+ removed_trait_bound: $ => seq(
+ '?',
+ $._type,
+ ),
+
+ type_parameters: $ => prec(1, seq(
+ '<',
+ sepBy1(',', seq(
+ repeat($.attribute_item),
+ choice(
+ $.metavariable,
+ $.type_parameter,
+ $.lifetime_parameter,
+ $.const_parameter,
+ ),
+ )),
+ optional(','),
+ '>',
+ )),
+
+ const_parameter: $ => seq(
+ 'const',
+ field('name', $.identifier),
+ ':',
+ field('type', $._type),
+ optional(
+ seq(
+ '=',
+ field('value',
+ choice(
+ $.block,
+ $.identifier,
+ $._literal,
+ $.negative_literal,
+ ),
+ ),
+ ),
+ ),
+ ),
+
+ type_parameter: $ => prec(1, seq(
+ field('name', $._type_identifier),
+ optional(field('bounds', $.trait_bounds)),
+ optional(
+ seq(
+ '=',
+ field('default_type', $._type),
+ ),
+ ),
+ )),
+
+ lifetime_parameter: $ => prec(1, seq(
+ field('name', $.lifetime),
+ optional(field('bounds', $.trait_bounds)),
+ )),
+
+ let_declaration: $ => seq(
+ 'let',
+ optional($.mutable_specifier),
+ field('pattern', $._pattern),
+ optional(seq(
+ ':',
+ field('type', $._type),
+ )),
+ optional(seq(
+ '=',
+ field('value', $._expression),
+ )),
+ optional(seq(
+ 'else',
+ field('alternative', $.block),
+ )),
+ ';',
+ ),
+
+ use_declaration: $ => seq(
+ optional($.visibility_modifier),
+ 'use',
+ field('argument', $._use_clause),
+ ';',
+ ),
+
+ _use_clause: $ => choice(
+ $._path,
+ $.use_as_clause,
+ $.use_list,
+ $.scoped_use_list,
+ $.use_wildcard,
+ ),
+
+ scoped_use_list: $ => seq(
+ field('path', optional($._path)),
+ '::',
+ field('list', $.use_list),
+ ),
+
+ use_list: $ => seq(
+ '{',
+ sepBy(',', choice(
+ $._use_clause,
+ )),
+ optional(','),
+ '}',
+ ),
+
+ use_as_clause: $ => seq(
+ field('path', $._path),
+ 'as',
+ field('alias', $.identifier),
+ ),
+
+ use_wildcard: $ => seq(
+ optional(seq(optional($._path), '::')),
+ '*',
+ ),
+
+ parameters: $ => seq(
+ '(',
+ sepBy(',', seq(
+ optional($.attribute_item),
+ choice(
+ $.parameter,
+ $.self_parameter,
+ $.variadic_parameter,
+ '_',
+ $._type,
+ ))),
+ optional(','),
+ ')',
+ ),
+
+ self_parameter: $ => seq(
+ optional('&'),
+ optional($.lifetime),
+ optional($.mutable_specifier),
+ $.self,
+ ),
+
+ variadic_parameter: $ => seq(
+ optional($.mutable_specifier),
+ optional(seq(
+ field('pattern', $._pattern),
+ ':',
+ )),
+ '...',
+ ),
+
+ parameter: $ => seq(
+ optional($.mutable_specifier),
+ field('pattern', choice(
+ $._pattern,
+ $.self,
+ )),
+ ':',
+ field('type', $._type),
+ ),
+
+ extern_modifier: $ => seq(
+ 'extern',
+ optional($.string_literal),
+ ),
+
+ visibility_modifier: $ => choice(
+ $.crate,
+ seq(
+ 'pub',
+ optional(seq(
+ '(',
+ choice(
+ $.self,
+ $.super,
+ $.crate,
+ seq('in', $._path),
+ ),
+ ')',
+ )),
+ ),
+ ),
+
+ // Section - Types
+
+ _type: $ => choice(
+ $.abstract_type,
+ $.reference_type,
+ $.metavariable,
+ $.pointer_type,
+ $.generic_type,
+ $.scoped_type_identifier,
+ $.tuple_type,
+ $.unit_type,
+ $.array_type,
+ $.function_type,
+ $._type_identifier,
+ $.macro_invocation,
+ $.never_type,
+ $.dynamic_type,
+ $.bounded_type,
+ $.removed_trait_bound,
+ alias(choice(...primitiveTypes), $.primitive_type),
+ ),
+
+ bracketed_type: $ => seq(
+ '<',
+ choice(
+ $._type,
+ $.qualified_type,
+ ),
+ '>',
+ ),
+
+ qualified_type: $ => seq(
+ field('type', $._type),
+ 'as',
+ field('alias', $._type),
+ ),
+
+ lifetime: $ => prec(1, seq('\'', $.identifier)),
+
+ array_type: $ => seq(
+ '[',
+ field('element', $._type),
+ optional(seq(
+ ';',
+ field('length', $._expression),
+ )),
+ ']',
+ ),
+
+ for_lifetimes: $ => seq(
+ 'for',
+ '<',
+ sepBy1(',', $.lifetime),
+ optional(','),
+ '>',
+ ),
+
+ function_type: $ => seq(
+ optional($.for_lifetimes),
+ prec(PREC.call, seq(
+ choice(
+ field('trait', choice(
+ $._type_identifier,
+ $.scoped_type_identifier,
+ )),
+ seq(
+ optional($.function_modifiers),
+ 'fn',
+ ),
+ ),
+ field('parameters', $.parameters),
+ )),
+ optional(seq('->', field('return_type', $._type))),
+ ),
+
+ tuple_type: $ => seq(
+ '(',
+ sepBy1(',', $._type),
+ optional(','),
+ ')',
+ ),
+
+ unit_type: _ => seq('(', ')'),
+
+ generic_function: $ => prec(1, seq(
+ field('function', choice(
+ $.identifier,
+ $.scoped_identifier,
+ $.field_expression,
+ )),
+ '::',
+ field('type_arguments', $.type_arguments),
+ )),
+
+ generic_type: $ => prec(1, seq(
+ field('type', choice(
+ $._type_identifier,
+ $._reserved_identifier,
+ $.scoped_type_identifier,
+ )),
+ field('type_arguments', $.type_arguments),
+ )),
+
+ generic_type_with_turbofish: $ => seq(
+ field('type', choice(
+ $._type_identifier,
+ $.scoped_identifier,
+ )),
+ '::',
+ field('type_arguments', $.type_arguments),
+ ),
+
+ bounded_type: $ => prec.left(-1, seq(
+ choice($.lifetime, $._type, $.use_bounds),
+ '+',
+ choice($.lifetime, $._type, $.use_bounds),
+ )),
+
+ use_bounds: $ => seq(
+ 'use',
+ token(prec(1, '<')),
+ sepBy(
+ ',',
+ choice(
+ $.lifetime,
+ $._type_identifier,
+ ),
+ ),
+ optional(','),
+ '>',
+ ),
+
+ type_arguments: $ => seq(
+ token(prec(1, '<')),
+ sepBy1(',', seq(
+ choice(
+ $._type,
+ $.type_binding,
+ $.lifetime,
+ $._literal,
+ $.block,
+ ),
+ optional($.trait_bounds),
+ )),
+ optional(','),
+ '>',
+ ),
+
+ type_binding: $ => seq(
+ field('name', $._type_identifier),
+ field('type_arguments', optional($.type_arguments)),
+ '=',
+ field('type', $._type),
+ ),
+
+ reference_type: $ => seq(
+ '&',
+ optional($.lifetime),
+ optional($.mutable_specifier),
+ field('type', $._type),
+ ),
+
+ pointer_type: $ => seq(
+ '*',
+ choice('const', $.mutable_specifier),
+ field('type', $._type),
+ ),
+
+ never_type: _ => '!',
+
+ abstract_type: $ => seq(
+ 'impl',
+ optional(seq('for', $.type_parameters)),
+ field('trait', prec(1, choice(
+ $._type_identifier,
+ $.scoped_type_identifier,
+ $.removed_trait_bound,
+ $.generic_type,
+ $.function_type,
+ $.tuple_type,
+ $.bounded_type,
+ ))),
+ ),
+
+ dynamic_type: $ => seq(
+ 'dyn',
+ field('trait', choice(
+ $.higher_ranked_trait_bound,
+ $._type_identifier,
+ $.scoped_type_identifier,
+ $.generic_type,
+ $.function_type,
+ $.tuple_type,
+ )),
+ ),
+
+ mutable_specifier: _ => 'mut',
+
+ // Section - Expressions
+
+ _expression_except_range: $ => choice(
+ $.unary_expression,
+ $.reference_expression,
+ $.try_expression,
+ $.binary_expression,
+ $.assignment_expression,
+ $.compound_assignment_expr,
+ $.type_cast_expression,
+ $.call_expression,
+ $.return_expression,
+ $.yield_expression,
+ $._literal,
+ prec.left($.identifier),
+ alias(choice(...primitiveTypes), $.identifier),
+ prec.left($._reserved_identifier),
+ $.self,
+ $.scoped_identifier,
+ $.generic_function,
+ $.await_expression,
+ $.field_expression,
+ $.array_expression,
+ $.tuple_expression,
+ prec(1, $.macro_invocation),
+ $.unit_expression,
+ $.break_expression,
+ $.continue_expression,
+ $.index_expression,
+ $.metavariable,
+ $.closure_expression,
+ $.parenthesized_expression,
+ $.struct_expression,
+ $._expression_ending_with_block,
+ ),
+
+ _expression: $ => choice(
+ $._expression_except_range,
+ $.range_expression,
+ ),
+
+ _expression_ending_with_block: $ => choice(
+ $.unsafe_block,
+ $.async_block,
+ $.gen_block,
+ $.try_block,
+ $.block,
+ $.if_expression,
+ $.match_expression,
+ $.while_expression,
+ $.loop_expression,
+ $.for_expression,
+ $.const_block,
+ ),
+
+ macro_invocation: $ => seq(
+ field('macro', choice(
+ $.scoped_identifier,
+ $.identifier,
+ $._reserved_identifier,
+ )),
+ '!',
+ alias($.delim_token_tree, $.token_tree),
+ ),
+
+ delim_token_tree: $ => choice(
+ seq('(', repeat($._delim_tokens), ')'),
+ seq('[', repeat($._delim_tokens), ']'),
+ seq('{', repeat($._delim_tokens), '}'),
+ ),
+
+ _delim_tokens: $ => choice(
+ $._non_delim_token,
+ alias($.delim_token_tree, $.token_tree),
+ ),
+
+ // Should match any token other than a delimiter.
+ _non_delim_token: $ => choice(
+ $._non_special_token,
+ '$',
+ ),
+
+ scoped_identifier: $ => seq(
+ field('path', optional(choice(
+ $._path,
+ $.bracketed_type,
+ alias($.generic_type_with_turbofish, $.generic_type),
+ ))),
+ '::',
+ field('name', choice($.identifier, $.super)),
+ ),
+
+ scoped_type_identifier_in_expression_position: $ => prec(-2, seq(
+ field('path', optional(choice(
+ $._path,
+ alias($.generic_type_with_turbofish, $.generic_type),
+ ))),
+ '::',
+ field('name', $._type_identifier),
+ )),
+
+ scoped_type_identifier: $ => seq(
+ field('path', optional(choice(
+ $._path,
+ alias($.generic_type_with_turbofish, $.generic_type),
+ $.bracketed_type,
+ $.generic_type,
+ ))),
+ '::',
+ field('name', $._type_identifier),
+ ),
+
+ range_expression: $ => prec.left(PREC.range, choice(
+ seq($._expression, choice('..', '...', '..='), $._expression),
+ seq($._expression, '..'),
+ seq('..', $._expression),
+ '..',
+ )),
+
+ unary_expression: $ => prec(PREC.unary, seq(
+ choice('-', '*', '!'),
+ $._expression,
+ )),
+
+ try_expression: $ => prec(PREC.try, seq(
+ $._expression,
+ '?',
+ )),
+
+ reference_expression: $ => prec(PREC.unary, seq(
+ '&',
+ choice(
+ seq('raw', choice('const', $.mutable_specifier)),
+ optional($.mutable_specifier),
+ ),
+ field('value', $._expression),
+ )),
+
+ binary_expression: $ => {
+ const table = [
+ [PREC.and, '&&'],
+ [PREC.or, '||'],
+ [PREC.bitand, '&'],
+ [PREC.bitor, '|'],
+ [PREC.bitxor, '^'],
+ [PREC.comparative, choice('==', '!=', '<', '<=', '>', '>=')],
+ [PREC.shift, choice('<<', '>>')],
+ [PREC.additive, choice('+', '-')],
+ [PREC.multiplicative, choice('*', '/', '%')],
+ ];
+
+ // @ts-ignore
+ return choice(...table.map(([precedence, operator]) => prec.left(precedence, seq(
+ field('left', $._expression),
+ // @ts-ignore
+ field('operator', operator),
+ field('right', $._expression),
+ ))));
+ },
+
+ assignment_expression: $ => prec.left(PREC.assign, seq(
+ field('left', $._expression),
+ '=',
+ field('right', $._expression),
+ )),
+
+ compound_assignment_expr: $ => prec.left(PREC.assign, seq(
+ field('left', $._expression),
+ field('operator', choice('+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=')),
+ field('right', $._expression),
+ )),
+
+ type_cast_expression: $ => prec.left(PREC.cast, seq(
+ field('value', $._expression),
+ 'as',
+ field('type', $._type),
+ )),
+
+ return_expression: $ => choice(
+ prec.left(seq('return', $._expression)),
+ prec(-1, 'return'),
+ ),
+
+ yield_expression: $ => choice(
+ prec.left(seq('yield', $._expression)),
+ prec(-1, 'yield'),
+ ),
+
+ call_expression: $ => prec(PREC.call, seq(
+ field('function', $._expression_except_range),
+ field('arguments', $.arguments),
+ )),
+
+ arguments: $ => seq(
+ '(',
+ sepBy(',', seq(repeat($.attribute_item), $._expression)),
+ optional(','),
+ ')',
+ ),
+
+ array_expression: $ => seq(
+ '[',
+ repeat($.attribute_item),
+ choice(
+ seq(
+ $._expression,
+ ';',
+ field('length', $._expression),
+ ),
+ seq(
+ sepBy(',', seq(repeat($.attribute_item), $._expression)),
+ optional(','),
+ ),
+ ),
+ ']',
+ ),
+
+ parenthesized_expression: $ => seq(
+ '(',
+ $._expression,
+ ')',
+ ),
+
+ tuple_expression: $ => seq(
+ '(',
+ repeat($.attribute_item),
+ seq($._expression, ','),
+ repeat(seq($._expression, ',')),
+ optional($._expression),
+ ')',
+ ),
+
+ unit_expression: _ => seq('(', ')'),
+
+ struct_expression: $ => seq(
+ field('name', choice(
+ $._type_identifier,
+ alias($.scoped_type_identifier_in_expression_position, $.scoped_type_identifier),
+ $.generic_type_with_turbofish,
+ )),
+ field('body', $.field_initializer_list),
+ ),
+
+ field_initializer_list: $ => seq(
+ '{',
+ sepBy(',', choice(
+ $.shorthand_field_initializer,
+ $.field_initializer,
+ $.base_field_initializer,
+ )),
+ optional(','),
+ '}',
+ ),
+
+ shorthand_field_initializer: $ => seq(
+ repeat($.attribute_item),
+ $.identifier,
+ ),
+
+ field_initializer: $ => seq(
+ repeat($.attribute_item),
+ field('field', choice($._field_identifier, $.integer_literal)),
+ ':',
+ field('value', $._expression),
+ ),
+
+ base_field_initializer: $ => seq(
+ '..',
+ $._expression,
+ ),
+
+ if_expression: $ => prec.right(seq(
+ 'if',
+ field('condition', $._condition),
+ field('consequence', $.block),
+ optional(field('alternative', $.else_clause)),
+ )),
+
+ let_condition: $ => seq(
+ 'let',
+ field('pattern', $._pattern),
+ '=',
+ field('value', prec.left(PREC.and, $._expression)),
+ ),
+
+ _let_chain: $ => prec.left(PREC.and, choice(
+ seq($._let_chain, '&&', $.let_condition),
+ seq($._let_chain, '&&', $._expression),
+ seq($.let_condition, '&&', $._expression),
+ seq($.let_condition, '&&', $.let_condition),
+ seq($._expression, '&&', $.let_condition),
+ )),
+
+ _condition: $ => choice(
+ $._expression,
+ $.let_condition,
+ alias($._let_chain, $.let_chain),
+ ),
+
+ else_clause: $ => seq(
+ 'else',
+ choice(
+ $.block,
+ $.if_expression,
+ ),
+ ),
+
+ match_expression: $ => seq(
+ 'match',
+ field('value', $._expression),
+ field('body', $.match_block),
+ ),
+
+ match_block: $ => seq(
+ '{',
+ optional(seq(
+ repeat($.match_arm),
+ alias($.last_match_arm, $.match_arm),
+ )),
+ '}',
+ ),
+
+ match_arm: $ => prec.right(seq(
+ repeat(choice($.attribute_item, $.inner_attribute_item)),
+ field('pattern', $.match_pattern),
+ '=>',
+ choice(
+ seq(field('value', $._expression), ','),
+ field('value', prec(1, $._expression_ending_with_block)),
+ ),
+ )),
+
+ last_match_arm: $ => seq(
+ repeat(choice($.attribute_item, $.inner_attribute_item)),
+ field('pattern', $.match_pattern),
+ '=>',
+ field('value', $._expression),
+ optional(','),
+ ),
+
+ match_pattern: $ => seq(
+ $._pattern,
+ optional(seq('if', field('condition', $._condition))),
+ ),
+
+ while_expression: $ => seq(
+ optional(seq($.label, ':')),
+ 'while',
+ field('condition', $._condition),
+ field('body', $.block),
+ ),
+
+ loop_expression: $ => seq(
+ optional(seq($.label, ':')),
+ 'loop',
+ field('body', $.block),
+ ),
+
+ for_expression: $ => seq(
+ optional(seq($.label, ':')),
+ 'for',
+ field('pattern', $._pattern),
+ 'in',
+ field('value', $._expression),
+ field('body', $.block),
+ ),
+
+ const_block: $ => seq(
+ 'const',
+ field('body', $.block),
+ ),
+
+ closure_expression: $ => prec(PREC.closure, seq(
+ optional('static'),
+ optional('async'),
+ optional('move'),
+ field('parameters', $.closure_parameters),
+ choice(
+ seq(
+ optional(seq('->', field('return_type', $._type))),
+ field('body', $.block),
+ ),
+ field('body', choice($._expression, '_')),
+ ),
+ )),
+
+ closure_parameters: $ => seq(
+ '|',
+ sepBy(',', choice(
+ $._pattern,
+ $.parameter,
+ )),
+ '|',
+ ),
+
+ label: $ => seq('\'', $.identifier),
+
+ break_expression: $ => prec.left(seq('break', optional($.label), optional($._expression))),
+
+ continue_expression: $ => prec.left(seq('continue', optional($.label))),
+
+ index_expression: $ => prec(PREC.call, seq($._expression, '[', $._expression, ']')),
+
+ await_expression: $ => prec(PREC.field, seq(
+ $._expression,
+ '.',
+ 'await',
+ )),
+
+ field_expression: $ => prec(PREC.field, seq(
+ field('value', $._expression),
+ '.',
+ field('field', choice(
+ $._field_identifier,
+ $.integer_literal,
+ )),
+ )),
+
+ unsafe_block: $ => seq(
+ 'unsafe',
+ $.block,
+ ),
+
+ async_block: $ => seq(
+ 'async',
+ optional('move'),
+ $.block,
+ ),
+
+ gen_block: $ => seq(
+ 'gen',
+ optional('move'),
+ $.block,
+ ),
+
+ try_block: $ => seq(
+ 'try',
+ $.block,
+ ),
+
+ block: $ => seq(
+ optional(seq($.label, ':')),
+ '{',
+ repeat($._statement),
+ optional($._expression),
+ '}',
+ ),
+
+ // Section - Patterns
+
+ _pattern: $ => choice(
+ $._literal_pattern,
+ alias(choice(...primitiveTypes), $.identifier),
+ $.identifier,
+ $.scoped_identifier,
+ $.generic_pattern,
+ $.tuple_pattern,
+ $.tuple_struct_pattern,
+ $.struct_pattern,
+ $._reserved_identifier,
+ $.ref_pattern,
+ $.slice_pattern,
+ $.captured_pattern,
+ $.reference_pattern,
+ $.remaining_field_pattern,
+ $.mut_pattern,
+ $.range_pattern,
+ $.or_pattern,
+ $.const_block,
+ $.macro_invocation,
+ '_',
+ ),
+
+ generic_pattern: $ => seq(
+ choice(
+ $.identifier,
+ $.scoped_identifier,
+ ),
+ '::',
+ field('type_arguments', $.type_arguments),
+ ),
+
+ tuple_pattern: $ => seq(
+ '(',
+ sepBy(',', choice($._pattern, $.closure_expression)),
+ optional(','),
+ ')',
+ ),
+
+ slice_pattern: $ => seq(
+ '[',
+ sepBy(',', $._pattern),
+ optional(','),
+ ']',
+ ),
+
+ tuple_struct_pattern: $ => seq(
+ field('type', choice(
+ $.identifier,
+ $.scoped_identifier,
+ alias($.generic_type_with_turbofish, $.generic_type),
+ )),
+ '(',
+ sepBy(',', $._pattern),
+ optional(','),
+ ')',
+ ),
+
+ struct_pattern: $ => seq(
+ field('type', choice(
+ $._type_identifier,
+ $.scoped_type_identifier,
+ )),
+ '{',
+ sepBy(',', choice($.field_pattern, $.remaining_field_pattern)),
+ optional(','),
+ '}',
+ ),
+
+ field_pattern: $ => seq(
+ optional('ref'),
+ optional($.mutable_specifier),
+ choice(
+ field('name', alias($.identifier, $.shorthand_field_identifier)),
+ seq(
+ field('name', $._field_identifier),
+ ':',
+ field('pattern', $._pattern),
+ ),
+ ),
+ ),
+
+ remaining_field_pattern: _ => '..',
+
+ mut_pattern: $ => prec(-1, seq(
+ $.mutable_specifier,
+ $._pattern,
+ )),
+
+ range_pattern: $ => choice(
+ seq(
+ field('left', choice(
+ $._literal_pattern,
+ $._path,
+ )),
+ choice(
+ seq(
+ choice('...', '..=', '..'),
+ field('right', choice(
+ $._literal_pattern,
+ $._path,
+ )),
+ ),
+ '..',
+ ),
+ ),
+ seq(
+ choice('..=', '..'),
+ field('right', choice(
+ $._literal_pattern,
+ $._path,
+ )),
+ ),
+ ),
+
+ ref_pattern: $ => seq(
+ 'ref',
+ $._pattern,
+ ),
+
+ captured_pattern: $ => seq(
+ $.identifier,
+ '@',
+ $._pattern,
+ ),
+
+ reference_pattern: $ => seq(
+ '&',
+ optional($.mutable_specifier),
+ $._pattern,
+ ),
+
+ or_pattern: $ => prec.left(-2, choice(
+ seq($._pattern, '|', $._pattern),
+ seq('|', $._pattern),
+ )),
+
+ // Section - Literals
+
+ _literal: $ => choice(
+ $.string_literal,
+ $.raw_string_literal,
+ $.char_literal,
+ $.boolean_literal,
+ $.integer_literal,
+ $.float_literal,
+ ),
+
+ _literal_pattern: $ => choice(
+ $.string_literal,
+ $.raw_string_literal,
+ $.char_literal,
+ $.boolean_literal,
+ $.integer_literal,
+ $.float_literal,
+ $.negative_literal,
+ ),
+
+ negative_literal: $ => seq('-', choice($.integer_literal, $.float_literal)),
+
+ integer_literal: _ => token(seq(
+ choice(
+ /[0-9][0-9_]*/,
+ /0x[0-9a-fA-F_]+/,
+ /0b[01_]+/,
+ /0o[0-7_]+/,
+ ),
+ optional(choice(...numericTypes)),
+ )),
+
+ string_literal: $ => seq(
+ alias(/[bc]?"/, '"'),
+ repeat(choice(
+ $.escape_sequence,
+ $.string_content,
+ )),
+ token.immediate('"'),
+ ),
+
+ raw_string_literal: $ => seq(
+ $._raw_string_literal_start,
+ alias($.raw_string_literal_content, $.string_content),
+ $._raw_string_literal_end,
+ ),
+
+ char_literal: _ => token(seq(
+ optional('b'),
+ '\'',
+ optional(choice(
+ seq('\\', choice(
+ /[^xu]/,
+ /u[0-9a-fA-F]{4}/,
+ /u\{[0-9a-fA-F]+\}/,
+ /x[0-9a-fA-F]{2}/,
+ )),
+ /[^\\']/,
+ )),
+ '\'',
+ )),
+
+ escape_sequence: _ => token.immediate(
+ seq('\\',
+ choice(
+ /[^xu]/,
+ /u[0-9a-fA-F]{4}/,
+ /u\{[0-9a-fA-F]+\}/,
+ /x[0-9a-fA-F]{2}/,
+ ),
+ )),
+
+ boolean_literal: _ => choice('true', 'false'),
+
+ comment: $ => choice(
+ $.line_comment,
+ $.block_comment,
+ ),
+
+ line_comment: $ => seq(
+ // All line comments start with two //
+ '//',
+ // Then are followed by:
+ // - 2 or more slashes making it a regular comment
+ // - 1 slash or 1 or more bang operators making it a doc comment
+ // - or just content for the comment
+ choice(
+ // A tricky edge case where what looks like a doc comment is not
+ seq(token.immediate(prec(2, /\/\//)), /.*/),
+ // A regular doc comment
+ seq($._line_doc_comment_marker, field('doc', alias($._line_doc_content, $.doc_comment))),
+ token.immediate(prec(1, /.*/)),
+ ),
+ ),
+
+ _line_doc_comment_marker: $ => choice(
+ // An outer line doc comment applies to the element that it is outside of
+ field('outer', alias($._outer_line_doc_comment_marker, $.outer_doc_comment_marker)),
+ // An inner line doc comment applies to the element it is inside of
+ field('inner', alias($._inner_line_doc_comment_marker, $.inner_doc_comment_marker)),
+ ),
+
+ _inner_line_doc_comment_marker: _ => token.immediate(prec(2, '!')),
+ _outer_line_doc_comment_marker: _ => token.immediate(prec(2, '/')),
+
+ block_comment: $ => seq(
+ '/*',
+ optional(
+ choice(
+ // Documentation block comments: /** docs */ or /*! docs */
+ seq(
+ $._block_doc_comment_marker,
+ optional(field('doc', alias($._block_comment_content, $.doc_comment))),
+ ),
+ // Non-doc block comments
+ $._block_comment_content,
+ ),
+ ),
+ '*/',
+ ),
+
+ _block_doc_comment_marker: $ => choice(
+ field('outer', alias($._outer_block_doc_comment_marker, $.outer_doc_comment_marker)),
+ field('inner', alias($._inner_block_doc_comment_marker, $.inner_doc_comment_marker)),
+ ),
+
+ _path: $ => choice(
+ $.self,
+ alias(choice(...primitiveTypes), $.identifier),
+ $.metavariable,
+ $.super,
+ $.crate,
+ $.identifier,
+ $.scoped_identifier,
+ $._reserved_identifier,
+ ),
+
+ identifier: _ => /(r#)?[_\p{XID_Start}][_\p{XID_Continue}]*/,
+
+ shebang: _ => /#![\r\f\t\v ]*([^\[\n].*)?\n/,
+
+ _reserved_identifier: $ => alias(choice(
+ 'default',
+ 'union',
+ 'gen',
+ ), $.identifier),
+
+ _type_identifier: $ => alias($.identifier, $.type_identifier),
+ _field_identifier: $ => alias($.identifier, $.field_identifier),
+
+ self: _ => 'self',
+ super: _ => 'super',
+ crate: _ => 'crate',
+
+ metavariable: _ => /\$[a-zA-Z_]\w*/,
+ },
+});
+
+/**
+ * Creates a rule to match one or more of the rules separated by the separator.
+ *
+ * @param {RuleOrLiteral} sep - The separator to use.
+ * @param {RuleOrLiteral} rule
+ *
+ * @returns {SeqRule}
+ */
+function sepBy1(sep, rule) {
+ return seq(rule, repeat(seq(sep, rule)));
+}
+
+
+/**
+ * Creates a rule to optionally match one or more of the rules separated by the separator.
+ *
+ * @param {RuleOrLiteral} sep - The separator to use.
+ * @param {RuleOrLiteral} rule
+ *
+ * @returns {ChoiceRule}
+ */
+function sepBy(sep, rule) {
+ return optional(sepBy1(sep, rule));
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/highlights.scm b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/highlights.scm
new file mode 100644
index 0000000000000..48c7284ec2b1c
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/highlights.scm
@@ -0,0 +1,161 @@
+; Identifiers
+
+(type_identifier) @type
+(primitive_type) @type.builtin
+(field_identifier) @property
+
+; Identifier conventions
+
+; Assume all-caps names are constants
+((identifier) @constant
+ (#match? @constant "^[A-Z][A-Z\\d_]+$'"))
+
+; Assume uppercase names are enum constructors
+((identifier) @constructor
+ (#match? @constructor "^[A-Z]"))
+
+; Assume that uppercase names in paths are types
+((scoped_identifier
+ path: (identifier) @type)
+ (#match? @type "^[A-Z]"))
+((scoped_identifier
+ path: (scoped_identifier
+ name: (identifier) @type))
+ (#match? @type "^[A-Z]"))
+((scoped_type_identifier
+ path: (identifier) @type)
+ (#match? @type "^[A-Z]"))
+((scoped_type_identifier
+ path: (scoped_identifier
+ name: (identifier) @type))
+ (#match? @type "^[A-Z]"))
+
+; Assume all qualified names in struct patterns are enum constructors. (They're
+; either that, or struct names; highlighting both as constructors seems to be
+; the less glaring choice of error, visually.)
+(struct_pattern
+ type: (scoped_type_identifier
+ name: (type_identifier) @constructor))
+
+; Function calls
+
+(call_expression
+ function: (identifier) @function)
+(call_expression
+ function: (field_expression
+ field: (field_identifier) @function.method))
+(call_expression
+ function: (scoped_identifier
+ "::"
+ name: (identifier) @function))
+
+(generic_function
+ function: (identifier) @function)
+(generic_function
+ function: (scoped_identifier
+ name: (identifier) @function))
+(generic_function
+ function: (field_expression
+ field: (field_identifier) @function.method))
+
+(macro_invocation
+ macro: (identifier) @function.macro
+ "!" @function.macro)
+
+; Function definitions
+
+(function_item (identifier) @function)
+(function_signature_item (identifier) @function)
+
+(line_comment) @comment
+(block_comment) @comment
+
+(line_comment (doc_comment)) @comment.documentation
+(block_comment (doc_comment)) @comment.documentation
+
+"(" @punctuation.bracket
+")" @punctuation.bracket
+"[" @punctuation.bracket
+"]" @punctuation.bracket
+"{" @punctuation.bracket
+"}" @punctuation.bracket
+
+(type_arguments
+ "<" @punctuation.bracket
+ ">" @punctuation.bracket)
+(type_parameters
+ "<" @punctuation.bracket
+ ">" @punctuation.bracket)
+
+"::" @punctuation.delimiter
+":" @punctuation.delimiter
+"." @punctuation.delimiter
+"," @punctuation.delimiter
+";" @punctuation.delimiter
+
+(parameter (identifier) @variable.parameter)
+
+(lifetime (identifier) @label)
+
+"as" @keyword
+"async" @keyword
+"await" @keyword
+"break" @keyword
+"const" @keyword
+"continue" @keyword
+"default" @keyword
+"dyn" @keyword
+"else" @keyword
+"enum" @keyword
+"extern" @keyword
+"fn" @keyword
+"for" @keyword
+"gen" @keyword
+"if" @keyword
+"impl" @keyword
+"in" @keyword
+"let" @keyword
+"loop" @keyword
+"macro_rules!" @keyword
+"match" @keyword
+"mod" @keyword
+"move" @keyword
+"pub" @keyword
+"raw" @keyword
+"ref" @keyword
+"return" @keyword
+"static" @keyword
+"struct" @keyword
+"trait" @keyword
+"type" @keyword
+"union" @keyword
+"unsafe" @keyword
+"use" @keyword
+"where" @keyword
+"while" @keyword
+"yield" @keyword
+(crate) @keyword
+(mutable_specifier) @keyword
+(use_list (self) @keyword)
+(scoped_use_list (self) @keyword)
+(scoped_identifier (self) @keyword)
+(super) @keyword
+
+(self) @variable.builtin
+
+(char_literal) @string
+(string_literal) @string
+(raw_string_literal) @string
+
+(boolean_literal) @constant.builtin
+(integer_literal) @constant.builtin
+(float_literal) @constant.builtin
+
+(escape_sequence) @escape
+
+(attribute_item) @attribute
+(inner_attribute_item) @attribute
+
+"*" @operator
+"&" @operator
+"'" @operator
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/scanner.c b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/scanner.c
new file mode 100644
index 0000000000000..269f6b2af61c4
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/scanner.c
@@ -0,0 +1,393 @@
+#include "tree_sitter/alloc.h"
+#include "tree_sitter/parser.h"
+
+#include <wctype.h>
+
+enum TokenType {
+ STRING_CONTENT,
+ RAW_STRING_LITERAL_START,
+ RAW_STRING_LITERAL_CONTENT,
+ RAW_STRING_LITERAL_END,
+ FLOAT_LITERAL,
+ BLOCK_OUTER_DOC_MARKER,
+ BLOCK_INNER_DOC_MARKER,
+ BLOCK_COMMENT_CONTENT,
+ LINE_DOC_CONTENT,
+ ERROR_SENTINEL
+};
+
+typedef struct {
+ uint8_t opening_hash_count;
+} Scanner;
+
+void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
+
+void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
+
+unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
+ Scanner *scanner = (Scanner *)payload;
+ buffer[0] = (char)scanner->opening_hash_count;
+ return 1;
+}
+
+void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+ Scanner *scanner = (Scanner *)payload;
+ scanner->opening_hash_count = 0;
+ if (length == 1) {
+ Scanner *scanner = (Scanner *)payload;
+ scanner->opening_hash_count = buffer[0];
+ }
+}
+
+static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static inline bool process_string(TSLexer *lexer) {
+ bool has_content = false;
+ for (;;) {
+ if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
+ break;
+ }
+ if (lexer->eof(lexer)) {
+ return false;
+ }
+ has_content = true;
+ advance(lexer);
+ }
+ lexer->result_symbol = STRING_CONTENT;
+ lexer->mark_end(lexer);
+ return has_content;
+}
+
+static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
+ if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
+ advance(lexer);
+ }
+ if (lexer->lookahead != 'r') {
+ return false;
+ }
+ advance(lexer);
+
+ uint8_t opening_hash_count = 0;
+ while (lexer->lookahead == '#') {
+ advance(lexer);
+ opening_hash_count++;
+ }
+
+ if (lexer->lookahead != '"') {
+ return false;
+ }
+ advance(lexer);
+ scanner->opening_hash_count = opening_hash_count;
+
+ lexer->result_symbol = RAW_STRING_LITERAL_START;
+ return true;
+}
+
+static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
+ for (;;) {
+ if (lexer->eof(lexer)) {
+ return false;
+ }
+ if (lexer->lookahead == '"') {
+ lexer->mark_end(lexer);
+ advance(lexer);
+ unsigned hash_count = 0;
+ while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
+ advance(lexer);
+ hash_count++;
+ }
+ if (hash_count == scanner->opening_hash_count) {
+ lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
+ return true;
+ }
+ } else {
+ advance(lexer);
+ }
+ }
+}
+
+static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
+ advance(lexer);
+ for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
+ advance(lexer);
+ }
+ lexer->result_symbol = RAW_STRING_LITERAL_END;
+ return true;
+}
+
+static inline bool process_float_literal(TSLexer *lexer) {
+ lexer->result_symbol = FLOAT_LITERAL;
+
+ advance(lexer);
+ while (is_num_char(lexer->lookahead)) {
+ advance(lexer);
+ }
+
+ bool has_fraction = false, has_exponent = false;
+
+ if (lexer->lookahead == '.') {
+ has_fraction = true;
+ advance(lexer);
+ if (iswalpha(lexer->lookahead)) {
+ // The dot is followed by a letter: 1.max(2) => not a float but an integer
+ return false;
+ }
+
+ if (lexer->lookahead == '.') {
+ return false;
+ }
+ while (is_num_char(lexer->lookahead)) {
+ advance(lexer);
+ }
+ }
+
+ lexer->mark_end(lexer);
+
+ if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
+ has_exponent = true;
+ advance(lexer);
+ if (lexer->lookahead == '+' || lexer->lookahead == '-') {
+ advance(lexer);
+ }
+ if (!is_num_char(lexer->lookahead)) {
+ return true;
+ }
+ advance(lexer);
+ while (is_num_char(lexer->lookahead)) {
+ advance(lexer);
+ }
+
+ lexer->mark_end(lexer);
+ }
+
+ if (!has_exponent && !has_fraction) {
+ return false;
+ }
+
+ if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
+ return true;
+ }
+ advance(lexer);
+ if (!iswdigit(lexer->lookahead)) {
+ return true;
+ }
+
+ while (iswdigit(lexer->lookahead)) {
+ advance(lexer);
+ }
+
+ lexer->mark_end(lexer);
+ return true;
+}
+
+static inline bool process_line_doc_content(TSLexer *lexer) {
+ lexer->result_symbol = LINE_DOC_CONTENT;
+ for (;;) {
+ if (lexer->eof(lexer)) {
+ return true;
+ }
+ if (lexer->lookahead == '\n') {
+ // Include the newline in the doc content node.
+ // Line endings are useful for markdown injection.
+ advance(lexer);
+ return true;
+ }
+ advance(lexer);
+ }
+}
+
+typedef enum {
+ LeftForwardSlash,
+ LeftAsterisk,
+ Continuing,
+} BlockCommentState;
+
+typedef struct {
+ BlockCommentState state;
+ unsigned nestingDepth;
+} BlockCommentProcessing;
+
+static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
+ if (current == '*') {
+ processing->nestingDepth += 1;
+ }
+ processing->state = Continuing;
+};
+
+static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
+ if (current == '*') {
+ lexer->mark_end(lexer);
+ processing->state = LeftAsterisk;
+ return;
+ }
+
+ if (current == '/') {
+ processing->nestingDepth -= 1;
+ }
+
+ processing->state = Continuing;
+}
+
+static inline void process_continuing(BlockCommentProcessing *processing, char current) {
+ switch (current) {
+ case '/':
+ processing->state = LeftForwardSlash;
+ break;
+ case '*':
+ processing->state = LeftAsterisk;
+ break;
+ }
+}
+
+static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
+ char first = (char)lexer->lookahead;
+ // The first character is stored so we can safely advance inside
+ // these if blocks. However, because we only store one, we can only
+ // safely advance 1 time. Since there's a chance that an advance could
+ // happen in one state, we must advance in all states to ensure that
+ // the program ends up in a sane state prior to processing the block
+ // comment if need be.
+ if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
+ lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
+ advance(lexer);
+ return true;
+ }
+ if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
+ advance(lexer);
+ lexer->mark_end(lexer);
+ // If the next token is a / that means that it's an empty block comment.
+ if (lexer->lookahead == '/') {
+ return false;
+ }
+ // If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER
+ // as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more.
+ if (lexer->lookahead != '*') {
+ lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
+ return true;
+ }
+ } else {
+ advance(lexer);
+ }
+
+ if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
+ BlockCommentProcessing processing = {Continuing, 1};
+ // Manually set the current state based on the first character
+ switch (first) {
+ case '*':
+ processing.state = LeftAsterisk;
+ if (lexer->lookahead == '/') {
+ // This case can happen in an empty doc block comment
+ // like /*!*/. The comment has no contents, so bail.
+ return false;
+ }
+ break;
+ case '/':
+ processing.state = LeftForwardSlash;
+ break;
+ default:
+ processing.state = Continuing;
+ break;
+ }
+
+ // For the purposes of actually parsing rust code, this
+ // is incorrect as it considers an unterminated block comment
+ // to be an error. However, for the purposes of syntax highlighting
+ // this should be considered successful as otherwise you are not able
+ // to syntax highlight a block of code prior to closing the
+ // block comment
+ while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
+ // Set first to the current lookahead as that is the second character
+ // as we force an advance in the above code when we are checking if we
+ // need to handle a block comment inner or outer doc comment signifier
+ // node
+ first = (char)lexer->lookahead;
+ switch (processing.state) {
+ case LeftForwardSlash:
+ process_left_forward_slash(&processing, first);
+ break;
+ case LeftAsterisk:
+ process_left_asterisk(&processing, first, lexer);
+ break;
+ case Continuing:
+ lexer->mark_end(lexer);
+ process_continuing(&processing, first);
+ break;
+ default:
+ break;
+ }
+ advance(lexer);
+ if (first == '/' && processing.nestingDepth != 0) {
+ lexer->mark_end(lexer);
+ }
+ }
+ lexer->result_symbol = BLOCK_COMMENT_CONTENT;
+ return true;
+ }
+
+ return false;
+}
+
+bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+ // The documentation states that if the lexical analysis fails for some reason
+ // they will mark every state as valid and pass it to the external scanner
+ // However, we can't do anything to help them recover in that case so we
+ // should just fail.
+ /*
+ link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+ If a syntax error is encountered during regular parsing, Tree-sitter’s
+ first action during error recovery will be to call the external scanner’s
+ scan function with all tokens marked valid. The scanner should detect this
+ case and handle it appropriately. One simple method of detection is to add
+ an unused token to the end of the externals array, for example
+
+ externals: $ => [$.token1, $.token2, $.error_sentinel],
+
+ then check whether that token is marked valid to determine whether
+ Tree-sitter is in error correction mode.
+ */
+ if (valid_symbols[ERROR_SENTINEL]) {
+ return false;
+ }
+
+ Scanner *scanner = (Scanner *)payload;
+
+ if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
+ valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
+ return process_block_comment(lexer, valid_symbols);
+ }
+
+ if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
+ return process_string(lexer);
+ }
+
+ if (valid_symbols[LINE_DOC_CONTENT]) {
+ return process_line_doc_content(lexer);
+ }
+
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+
+ if (valid_symbols[RAW_STRING_LITERAL_START] &&
+ (lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
+ return scan_raw_string_start(scanner, lexer);
+ }
+
+ if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
+ return scan_raw_string_content(scanner, lexer);
+ }
+
+ if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
+ return scan_raw_string_end(scanner, lexer);
+ }
+
+ if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
+ return process_float_literal(lexer);
+ }
+
+ return false;
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/tree-sitter.json b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/tree-sitter.json
new file mode 100644
index 0000000000000..76a39633212ab
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/tree-sitter.json
@@ -0,0 +1,53 @@
+{
+ "grammars": [
+ {
+ "name": "rust",
+ "camelcase": "Rust",
+ "scope": "source.rust",
+ "path": ".",
+ "file-types": [
+ "rs"
+ ],
+ "highlights": [
+ "queries/highlights.scm"
+ ],
+ "injections": [
+ "queries/injections.scm"
+ ],
+ "tags": [
+ "queries/tags.scm"
+ ],
+ "injection-regex": "rust"
+ }
+ ],
+ "metadata": {
+ "version": "0.24.0",
+ "license": "MIT",
+ "description": "Rust grammar for tree-sitter",
+ "authors": [
+ {
+ "name": "Maxim Sokolov",
+ "email": "maxim0xff at gmail.com"
+ },
+ {
+ "name": "Max Brunsfeld",
+ "email": "maxbrunsfeld at gmail.com"
+ },
+ {
+ "name": "Amaan Qureshi",
+ "email": "amaanq12 at gmail.com"
+ }
+ ],
+ "links": {
+ "repository": "https://github.com/tree-sitter/tree-sitter-rust"
+ }
+ },
+ "bindings": {
+ "c": true,
+ "go": true,
+ "node": true,
+ "python": true,
+ "rust": true,
+ "swift": true
+ }
+}
diff --git a/lldb/unittests/Highlighter/CMakeLists.txt b/lldb/unittests/Highlighter/CMakeLists.txt
index a4fc679cc931f..827020f207620 100644
--- a/lldb/unittests/Highlighter/CMakeLists.txt
+++ b/lldb/unittests/Highlighter/CMakeLists.txt
@@ -1,5 +1,8 @@
if (LLDB_ENABLE_TREESITTER)
- set(SWIFT_HIGHLIGHTER_PLUGIN lldbPluginHighlighterTreeSitterSwift)
+ set(TREESITTER_HIGHLIGHTER_PLUGINS
+ lldbPluginHighlighterTreeSitterRust
+ lldbPluginHighlighterTreeSitterSwift
+ )
endif()
add_lldb_unittest(HighlighterTests
@@ -11,5 +14,5 @@ add_lldb_unittest(HighlighterTests
lldbPluginCPlusPlusLanguage
lldbPluginObjCLanguage
lldbPluginObjCPlusPlusLanguage
- ${SWIFT_HIGHLIGHTER_PLUGIN}
+ ${TREESITTER_HIGHLIGHTER_PLUGINS}
)
diff --git a/lldb/unittests/Highlighter/HighlighterTest.cpp b/lldb/unittests/Highlighter/HighlighterTest.cpp
index 9e9526ccd1167..cbf4e1dae2740 100644
--- a/lldb/unittests/Highlighter/HighlighterTest.cpp
+++ b/lldb/unittests/Highlighter/HighlighterTest.cpp
@@ -14,9 +14,11 @@
#include "Plugins/Language/ObjC/ObjCLanguage.h"
#include "Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h"
#include "lldb/Core/Highlighter.h"
+#include "lldb/Host/Config.h"
#include "lldb/Host/FileSystem.h"
#if LLDB_ENABLE_TREESITTER
+#include "Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h"
#include "Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h"
#endif
@@ -31,7 +33,7 @@ class HighlighterTest : public testing::Test {
// filename.
SubsystemRAII<FileSystem, ClangHighlighter,
#if LLDB_ENABLE_TREESITTER
- SwiftTreeSitterHighlighter,
+ SwiftTreeSitterHighlighter, RustTreeSitterHighlighter,
#endif
DefaultHighlighter, CPlusPlusLanguage, ObjCLanguage,
ObjCPlusPlusLanguage>
@@ -57,6 +59,10 @@ TEST_F(HighlighterTest, HighlighterSelectionType) {
EXPECT_EQ(getName(lldb::eLanguageTypeObjC), "clang");
EXPECT_EQ(getName(lldb::eLanguageTypeObjC_plus_plus), "clang");
+#if LLDB_ENABLE_TREESITTER
+ EXPECT_EQ(getName(lldb::eLanguageTypeRust), "tree-sitter-rust");
+#endif
+
EXPECT_EQ(getName(lldb::eLanguageTypeUnknown), "none");
EXPECT_EQ(getName(lldb::eLanguageTypeJulia), "none");
EXPECT_EQ(getName(lldb::eLanguageTypeHaskell), "none");
@@ -436,4 +442,124 @@ TEST_F(HighlighterTest, SwiftClosures) {
EXPECT_EQ(" <k>let</k> closure = { (x: <k>Int</k>) in return x * 2 }",
highlightSwift(" let closure = { (x: Int) in return x * 2 }", s));
}
+
+static std::string
+highlightRust(llvm::StringRef code, HighlightStyle style,
+ std::optional<size_t> cursor = std::optional<size_t>()) {
+ HighlighterManager mgr;
+ const Highlighter &h =
+ mgr.getHighlighterFor(lldb::eLanguageTypeRust, "main.rs");
+ return h.Highlight(style, code, cursor);
+}
+
+TEST_F(HighlighterTest, RustComments) {
+ HighlightStyle s;
+ s.comment.Set("<cc>", "</cc>");
+
+ EXPECT_EQ(" <cc>// I'm feeling lucky today</cc>",
+ highlightRust(" // I'm feeling lucky today", s));
+ EXPECT_EQ(" <cc>/* This is a\nmultiline comment */</cc>",
+ highlightRust(" /* This is a\nmultiline comment */", s));
+ EXPECT_EQ(" <cc>/* nested /* comment */ works */</cc>",
+ highlightRust(" /* nested /* comment */ works */", s));
+ EXPECT_EQ(" <cc>/// Documentation comment</cc>",
+ highlightRust(" /// Documentation comment", s));
+ EXPECT_EQ(" <cc>//! Inner doc comment</cc>",
+ highlightRust(" //! Inner doc comment", s));
+}
+
+TEST_F(HighlighterTest, RustKeywords) {
+ HighlightStyle s;
+ s.keyword.Set("<k>", "</k>");
+
+ EXPECT_EQ(" <k>let</k> x = 5;", highlightRust(" let x = 5;", s));
+ EXPECT_EQ(" <k>let</k> <k>mut</k> y = 10;",
+ highlightRust(" let mut y = 10;", s));
+ EXPECT_EQ(" <k>fn</k> foo() { <k>return</k> 42; }",
+ highlightRust(" fn foo() { return 42; }", s));
+ EXPECT_EQ(" <k>struct</k> <k>Point</k> {}",
+ highlightRust(" struct Point {}", s));
+ EXPECT_EQ(" <k>enum</k> <k>Color</k> {}", highlightRust(" enum Color {}", s));
+ EXPECT_EQ(" <k>impl</k> <k>MyStruct</k> {}",
+ highlightRust(" impl MyStruct {}", s));
+ EXPECT_EQ(" <k>trait</k> <k>MyTrait</k> {}",
+ highlightRust(" trait MyTrait {}", s));
+ EXPECT_EQ(" <k>if</k> x { }", highlightRust(" if x { }", s));
+ EXPECT_EQ(" <k>for</k> i <k>in</k> 0..10 { }",
+ highlightRust(" for i in 0..10 { }", s));
+ EXPECT_EQ(" <k>while</k> x { }", highlightRust(" while x { }", s));
+ EXPECT_EQ(" <k>match</k> x { _ => {} }",
+ highlightRust(" match x { _ => {} }", s));
+ EXPECT_EQ(" <k>pub</k> <k>fn</k> foo() {}",
+ highlightRust(" pub fn foo() {}", s));
+ EXPECT_EQ(" <k>const</k> MAX: u32 = 100;",
+ highlightRust(" const MAX: u32 = 100;", s));
+ EXPECT_EQ(" <k>static</k> GLOBAL: i32 = 0;",
+ highlightRust(" static GLOBAL: i32 = 0;", s));
+}
+
+TEST_F(HighlighterTest, RustStringLiterals) {
+ HighlightStyle s;
+ s.string_literal.Set("<str>", "</str>");
+
+ EXPECT_EQ(" let s = <str>\"Hello, World!\"</str>;",
+ highlightRust(" let s = \"Hello, World!\";", s));
+ EXPECT_EQ(" let raw = <str>r\"C:\\\\path\"</str>;",
+ highlightRust(" let raw = r\"C:\\\\path\";", s));
+ EXPECT_EQ(" let raw2 = <str>r#\"He said \"hi\"\"#</str>;",
+ highlightRust(" let raw2 = r#\"He said \"hi\"\"#;", s));
+ EXPECT_EQ(" let byte_str = <str>b\"bytes\"</str>;",
+ highlightRust(" let byte_str = b\"bytes\";", s));
+}
+
+TEST_F(HighlighterTest, RustScalarLiterals) {
+ HighlightStyle s;
+ s.scalar_literal.Set("<scalar>", "</scalar>");
+
+ EXPECT_EQ(" let i = 42;", highlightRust(" let i = 42;", s));
+ EXPECT_EQ(" let hex = 0xFF;", highlightRust(" let hex = 0xFF;", s));
+ EXPECT_EQ(" let bin = 0b1010;", highlightRust(" let bin = 0b1010;", s));
+ EXPECT_EQ(" let oct = 0o77;", highlightRust(" let oct = 0o77;", s));
+ EXPECT_EQ(" let f = 3.14;", highlightRust(" let f = 3.14;", s));
+ EXPECT_EQ(" let typed = 42u32;", highlightRust(" let typed = 42u32;", s));
+ EXPECT_EQ(" let c = 'x';", highlightRust(" let c = 'x';", s));
+}
+
+TEST_F(HighlighterTest, RustIdentifiers) {
+ HighlightStyle s;
+ s.identifier.Set("<id>", "</id>");
+
+ EXPECT_EQ(" let foo = <id>bar</id>();",
+ highlightRust(" let foo = bar();", s));
+ EXPECT_EQ(" my_variable = 10;", highlightRust(" my_variable = 10;", s));
+ EXPECT_EQ(" let x: i32 = 5", highlightRust(" let x: i32 = 5", s));
+ EXPECT_EQ(" fn <id>foo</id>() -> String { }",
+ highlightRust(" fn foo() -> String { }", s));
+ EXPECT_EQ(" fn <id>foo</id><'a>(x: &'a str) {}",
+ highlightRust(" fn foo<'a>(x: &'a str) {}", s));
+ EXPECT_EQ(" struct Foo<'a> { x: &'a i32 }",
+ highlightRust(" struct Foo<'a> { x: &'a i32 }", s));
+}
+
+TEST_F(HighlighterTest, RustOperators) {
+ HighlightStyle s;
+ s.operators.Set("[", "]");
+
+ EXPECT_EQ(" 1+2-3[*]4/5", highlightRust(" 1+2-3*4/5", s));
+ EXPECT_EQ(" x && y || z", highlightRust(" x && y || z", s));
+ EXPECT_EQ(" a == b != c", highlightRust(" a == b != c", s));
+ EXPECT_EQ(" x [&]y", highlightRust(" x &y", s));
+ EXPECT_EQ(" [*]ptr", highlightRust(" *ptr", s));
+}
+
+TEST_F(HighlighterTest, RustCursorPosition) {
+ HighlightStyle s;
+ s.selected.Set("<c>", "</c>");
+
+ EXPECT_EQ("<c> </c>let x = 5;", highlightRust(" let x = 5;", s, 0));
+ EXPECT_EQ(" <c>l</c>et x = 5;", highlightRust(" let x = 5;", s, 1));
+ EXPECT_EQ(" l<c>e</c>t x = 5;", highlightRust(" let x = 5;", s, 2));
+ EXPECT_EQ(" le<c>t</c> x = 5;", highlightRust(" let x = 5;", s, 3));
+ EXPECT_EQ(" let<c> </c>x = 5;", highlightRust(" let x = 5;", s, 4));
+}
#endif
More information about the lldb-commits
mailing list