[llvm-branch-commits] [lldb] [lldb] Add tree-sitter based Swift syntax highlighting (PR #181297)
Jonas Devlieghere via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 13 12:50:36 PST 2026
https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/181297
>From 0d98d2e8c04aa3a1cf81f9dcd3a104ec8b4ca1be Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 12 Feb 2026 17:23:35 -0800
Subject: [PATCH 1/4] Address Alex' feedback
---
lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt | 1 +
.../Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
index f85595d3b574c..af942ab39c569 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_lldb_library(lldbTreeSitter
LINK_COMPONENTS
Support
LINK_LIBS
+ lldbCore
lldbUtility
${TreeSitter_LIBRARY}
)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
index 38530400b2b1d..afafbaec92f71 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
-#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
+#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_TREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_TREESITTERHIGHLIGHTER_H
#include "lldb/Core/Highlighter.h"
#include "lldb/Utility/Stream.h"
>From 3819666fec679cfec19f8ac694514d02171067df Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Fri, 13 Feb 2026 10:57:17 -0800
Subject: [PATCH 2/4] Prefer shorter matches as suggested by Charles
---
.../Highlighter/TreeSitter/TreeSitterHighlighter.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
index 181dc90683597..a109471a313ca 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
@@ -196,8 +196,10 @@ void TreeSitterHighlighter::Highlight(const HighlightStyle &options,
[](const HLRange &a, const HLRange &b) {
if (a.start_byte != b.start_byte)
return a.start_byte < b.start_byte;
- // Prefer longer matches.
- return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte);
+ // Prefer shorter matches. For example, if we have an expression
+ // consisting of a variable and a property, we want to highlight
+ // them as individual components.
+ return (b.end_byte - b.start_byte) > (a.end_byte - a.start_byte);
});
uint32_t current_pos = 0;
>From e79276a78e3ae98df2b08a095688954a2c8b15e7 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Fri, 13 Feb 2026 11:28:40 -0800
Subject: [PATCH 3/4] Require tree-sitter cli binary
---
lldb/cmake/modules/FindTreeSitter.cmake | 6 +++++-
lldb/include/lldb/Host/Config.h.cmake | 2 ++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/lldb/cmake/modules/FindTreeSitter.cmake b/lldb/cmake/modules/FindTreeSitter.cmake
index 04a40507cc9ab..4e2d8958f2b87 100644
--- a/lldb/cmake/modules/FindTreeSitter.cmake
+++ b/lldb/cmake/modules/FindTreeSitter.cmake
@@ -8,11 +8,15 @@ find_path(TreeSitter_INCLUDE_DIR
find_library(TreeSitter_LIBRARY
NAMES tree-sitter treesitter)
+find_program(TreeSitter_BINARY
+ NAMES tree-sitter)
+
find_package_handle_standard_args(TreeSitter
- REQUIRED_VARS TreeSitter_LIBRARY TreeSitter_INCLUDE_DIR
+ REQUIRED_VARS TreeSitter_LIBRARY TreeSitter_INCLUDE_DIR TreeSitter_BINARY
)
mark_as_advanced(
TreeSitter_INCLUDE_DIR
TreeSitter_LIBRARY
+ TreeSitter_BINARY
)
diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake
index 79eaf88216450..31bd9a5ce738d 100644
--- a/lldb/include/lldb/Host/Config.h.cmake
+++ b/lldb/include/lldb/Host/Config.h.cmake
@@ -51,6 +51,8 @@
#cmakedefine01 LLDB_EMBED_PYTHON_HOME
+#cmakedefine01 LLDB_ENABLE_TREESITTER
+
#cmakedefine LLDB_PYTHON_HOME R"(${LLDB_PYTHON_HOME})"
#define LLDB_INSTALL_LIBDIR_BASENAME "${LLDB_INSTALL_LIBDIR_BASENAME}"
>From 0a041a6344ef09afe41f1b4082af4d1c2c60765b Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Fri, 13 Feb 2026 12:47:15 -0800
Subject: [PATCH 4/4] [lldb] Add tree-sitter based Swift syntax highlighting
This adds tree-sitter based Swift syntax highlighting to LLDB. It
consists of the SwiftTreeSitterHighlighter plugin and the Swift grammar
from [1], which is licensed under MIT.
[1] https://github.com/alex-pinkus/tree-sitter-swift
Depends on:
* https://github.com/llvm/llvm-project/pull/181282
---
.../Highlighter/TreeSitter/CMakeLists.txt | 31 +
.../TreeSitter/Swift/CMakeLists.txt | 16 +
.../TreeSitter/Swift/HighlightQuery.h.in | 17 +
.../Swift/SwiftTreeSitterHighlighter.cpp | 43 +
.../Swift/SwiftTreeSitterHighlighter.h | 40 +
.../Swift/tree-sitter-swift/LICENSE | 21 +
.../Swift/tree-sitter-swift/grammar.js | 1594 +++++++++++++++++
.../Swift/tree-sitter-swift/highlights.scm | 336 ++++
.../Swift/tree-sitter-swift/scanner.c | 865 +++++++++
.../Swift/tree-sitter-swift/tree-sitter.json | 39 +
lldb/unittests/Highlighter/CMakeLists.txt | 5 +
.../unittests/Highlighter/HighlighterTest.cpp | 31 +-
12 files changed, 3036 insertions(+), 2 deletions(-)
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/CMakeLists.txt
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/HighlightQuery.h.in
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.cpp
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/LICENSE
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/grammar.js
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/highlights.scm
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/scanner.c
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/tree-sitter.json
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
index af942ab39c569..4635095a0f3d4 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -8,3 +8,34 @@ add_lldb_library(lldbTreeSitter
lldbUtility
${TreeSitter_LIBRARY}
)
+
+function(add_tree_sitter_grammar name source_dir binary_dir)
+ set(TreeSitter_SOURCE_DIR ${source_dir}/${name})
+ set(TreeSitter_BINARY_DIR ${binary_dir}/src)
+
+ add_custom_command(
+ OUTPUT ${TreeSitter_BINARY_DIR}/parser.c
+ COMMAND ${TreeSitter_BINARY} generate -o ${TreeSitter_BINARY_DIR}
+ DEPENDS ${TreeSitter_SOURCE_DIR}/grammar.js
+ WORKING_DIRECTORY ${TreeSitter_SOURCE_DIR}
+ VERBATIM
+ )
+
+ add_lldb_library(${name}
+ ${TreeSitter_BINARY_DIR}/parser.c
+ ${TreeSitter_SOURCE_DIR}/scanner.c
+ )
+ set_property(TARGET ${name} PROPERTY C_STANDARD 11)
+ target_compile_options(${name} PRIVATE
+ "-Wno-everything"
+ )
+
+ file(READ ${TreeSitter_SOURCE_DIR}/highlights.scm HIGHLIGHT_QUERY)
+ configure_file(
+ "${source_dir}/HighlightQuery.h.in"
+ "${binary_dir}/HighlightQuery.h"
+ @ONLY
+ )
+endfunction()
+
+add_subdirectory(Swift)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/CMakeLists.txt
new file mode 100644
index 0000000000000..5ada2163e796e
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_lldb_library(lldbPluginHighlighterTreeSitterSwift PLUGIN
+ SwiftTreeSitterHighlighter.cpp
+
+ LINK_COMPONENTS
+ Support
+ LINK_LIBS
+ lldbCore
+ lldbUtility
+ lldbTreeSitter
+ tree-sitter-swift
+)
+
+add_tree_sitter_grammar(tree-sitter-swift
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/HighlightQuery.h.in b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/HighlightQuery.h.in
new file mode 100644
index 0000000000000..869dc16fa6086
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/HighlightQuery.h.in
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_SWIFT_HIGHLIGHTQUERY_H
+#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_SWIFT_HIGHLIGHTQUERY_H
+
+#include "llvm/ADT/StringRef.h"
+
+static constexpr llvm::StringLiteral highlight_query =
+ R"__(@HIGHLIGHT_QUERY@)__";
+
+#endif // LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_SWIFT_HIGHLIGHTQUERY_H
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.cpp b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.cpp
new file mode 100644
index 0000000000000..8fd7fffff2da5
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.cpp
@@ -0,0 +1,43 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SwiftTreeSitterHighlighter.h"
+#include "HighlightQuery.h"
+#include "lldb/Target/Language.h"
+
+LLDB_PLUGIN_DEFINE_ADV(SwiftTreeSitterHighlighter, HighlighterTreeSitterSwift)
+
+extern "C" {
+const TSLanguage *tree_sitter_swift();
+}
+
+using namespace lldb_private;
+
+const TSLanguage *SwiftTreeSitterHighlighter::GetLanguage() const {
+ return tree_sitter_swift();
+}
+
+llvm::StringRef SwiftTreeSitterHighlighter::GetHighlightQuery() const {
+ return highlight_query;
+}
+
+Highlighter *
+SwiftTreeSitterHighlighter::CreateInstance(lldb::LanguageType language) {
+ if (language == lldb::eLanguageTypeSwift)
+ return new SwiftTreeSitterHighlighter();
+ return nullptr;
+}
+
+void SwiftTreeSitterHighlighter::Initialize() {
+ PluginManager::RegisterPlugin(GetPluginNameStatic(), GetPluginNameStatic(),
+ CreateInstance);
+}
+
+void SwiftTreeSitterHighlighter::Terminate() {
+ PluginManager::UnregisterPlugin(CreateInstance);
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h
new file mode 100644
index 0000000000000..215916e20b2f5
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_SWIFT_SWIFTTREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_SWIFT_SWIFTTREESITTERHIGHLIGHTER_H
+
+#include "../TreeSitterHighlighter.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace lldb_private {
+
+class SwiftTreeSitterHighlighter : public TreeSitterHighlighter {
+public:
+ SwiftTreeSitterHighlighter() = default;
+ ~SwiftTreeSitterHighlighter() override = default;
+
+ llvm::StringRef GetName() const override { return "tree-sitter-swift"; }
+
+ static Highlighter *CreateInstance(lldb::LanguageType language);
+
+ static void Terminate();
+ static void Initialize();
+
+ static llvm::StringRef GetPluginNameStatic() {
+ return "Tree-sitter Swift Highlighter";
+ }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+protected:
+ const TSLanguage *GetLanguage() const override;
+ llvm::StringRef GetHighlightQuery() const override;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_SWIFT_SWIFTTREESITTERHIGHLIGHTER_H
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/LICENSE b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/LICENSE
new file mode 100644
index 0000000000000..f158d70053113
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 alex-pinkus
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/grammar.js b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/grammar.js
new file mode 100644
index 0000000000000..18c03b766d825
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/grammar.js
@@ -0,0 +1,1594 @@
+"use strict";
+/*
+ * MIT License
+ *
+ * Copyright (c) 2021 alex-pinkus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+const PRECS = {
+ multiplication: 11,
+ addition: 10,
+ infix_operations: 9,
+ nil_coalescing: 8,
+ check: 7,
+ prefix_operations: 7,
+ comparison: 6,
+ postfix_operations: 6,
+ equality: 5,
+ conjunction: 4,
+ disjunction: 3,
+ block: 2,
+ loop: 1,
+ keypath: 1,
+ parameter_pack: 1,
+ control_transfer: 0,
+ as: -1,
+ tuple: -1,
+ if: -1,
+ switch: -1,
+ do: -1,
+ fully_open_range: -1,
+ range: -1,
+ navigation: -1,
+ expr: -1,
+ ty: -1,
+ call: -2,
+ ternary: -2,
+ try: -2,
+ call_suffix: -2,
+ range_suffix: -2,
+ ternary_binary_suffix: -2,
+ await: -2,
+ assignment: -3,
+ comment: -3,
+ lambda: -3,
+ regex: -4,
+};
+
+const DYNAMIC_PRECS = {
+ call : 1,
+};
+
+const DEC_DIGITS = token(sep1(/[0-9]+/, /_+/));
+const HEX_DIGITS = token(sep1(/[0-9a-fA-F]+/, /_+/));
+const OCT_DIGITS = token(sep1(/[0-7]+/, /_+/));
+const BIN_DIGITS = token(sep1(/[01]+/, /_+/));
+const REAL_EXPONENT = token(seq(/[eE]/, optional(/[+-]/), DEC_DIGITS));
+const HEX_REAL_EXPONENT = token(seq(/[pP]/, optional(/[+-]/), DEC_DIGITS));
+
+var LEXICAL_IDENTIFIER;
+
+if (tree_sitter_version_supports_emoji()) {
+ LEXICAL_IDENTIFIER =
+ /[_\p{XID_Start}\p{Emoji}&&[^0-9#*]](\p{EMod}|\x{FE0F}\x{20E3}?)?([_\p{XID_Continue}\p{Emoji}\x{200D}](\p{EMod}|\x{FE0F}\x{20E3}?)?)*/;
+} else {
+ LEXICAL_IDENTIFIER = /[_\p{XID_Start}][_\p{XID_Continue}]*/;
+}
+
+module.exports = grammar({
+ name : "swift",
+ conflicts : ($) => [
+ // @Type(... could either be an annotation constructor
+ // invocation or an annotated expression
+ [ $.attribute ],
+ [ $._attribute_argument ],
+ // Is `foo { ... }` a constructor invocation or function
+ // invocation?
+ [ $._simple_user_type, $._expression ],
+ // To support nested types A.B not being interpreted as
+ // `(navigation_expression ... (type_identifier))
+ // (navigation_suffix)`
+ [ $.user_type ],
+ // How to tell the difference between Foo.bar(with:and:), and
+ // Foo.bar(with: smth, and: other)? You need GLR
+ [ $.value_argument ],
+ // { (foo, bar) ...
+ [ $._expression, $.lambda_parameter ],
+ [ $._primary_expression, $.lambda_parameter ],
+ // (start: start, end: end)
+ [ $._tuple_type_item_identifier, $.tuple_expression ],
+ // After a `{` in a function or switch context, it's ambigous
+ // whether we're starting a set of local statements or applying
+ // some modifiers to a capture or pattern.
+ [ $.modifiers ],
+ // `+(...)` is ambigously either "call the function produced by
+ // a reference to the operator `+`" or "use the unary operator
+ // `+` on the result of the parenthetical expression."
+ [ $._additive_operator, $._prefix_unary_operator ],
+ [ $._referenceable_operator, $._prefix_unary_operator ],
+ // `{ [self, b, c] ...` could be a capture list or an array
+ // literal depending on what else happens.
+ [ $.capture_list_item, $._expression ],
+ [ $.capture_list_item, $._expression, $._simple_user_type ],
+ [ $._primary_expression, $.capture_list_item ],
+ // a ? b : c () could be calling c(), or it could be calling a
+ // function that's produced by the result of
+ // `(a ? b : c)`. We have a small hack to force it to be the
+ // former of these by intentionally introducing a conflict.
+ [ $.call_suffix, $.expr_hack_at_ternary_binary_call_suffix ],
+ // try {expression} is a bit magic and applies quite broadly:
+ // `try foo()` and `try foo { }` show that this is right
+ // associative, and `try foo ? bar() : baz` even more so. But it
+ // doesn't always win: something like `if try foo { } ...`
+ // should award its braces to the `if`. In order to make this
+ // actually happen, we need to parse all the options and pick
+ // the best one that doesn't error out.
+ [ $.try_expression, $._unary_expression ],
+ [ $.try_expression, $._expression ],
+ // await {expression} has the same special cases as `try`.
+ [ $.await_expression, $._unary_expression ],
+ [ $.await_expression, $._expression ],
+ // In a computed property, when you see an @attribute, it's not
+ // yet clear if that's going to be for a locally-declared class
+ // or a getter / setter specifier.
+ [
+ $._local_property_declaration,
+ $._local_typealias_declaration,
+ $._local_function_declaration,
+ $._local_class_declaration,
+ $.computed_getter,
+ $.computed_modify,
+ $.computed_setter,
+ ],
+ // The `class` modifier is legal in many of the same positions
+ // that a class declaration itself would be.
+ [ $._bodyless_function_declaration, $.property_modifier ],
+ [ $.init_declaration, $.property_modifier ],
+ // Patterns, man
+ [ $._navigable_type_expression, $._case_pattern ],
+ [
+ $._no_expr_pattern_already_bound, $._binding_pattern_no_expr
+ ],
+
+ // On encountering a closure starting with `{ @Foo ...`, we
+ // don't yet know if that attribute applies to the closure type
+ // or to a declaration within the closure. What a mess! We just
+ // have to hope that if we keep going, only one of those will
+ // parse (because there will be an `in` or a `let`).
+ [
+ $._lambda_type_declaration,
+ $._local_property_declaration,
+ $._local_typealias_declaration,
+ $._local_function_declaration,
+ $._local_class_declaration,
+ ],
+
+ // We want `foo() { }` to be treated as one function call, but
+ // we _also_ want `if foo() { ... }` to be treated as a full
+ // if-statement. This means we have to treat it as a conflict
+ // rather than purely a left or right associative construct, and
+ // let the parser realize that the second expression won't parse
+ // properly with the `{ ... }` as a lambda.
+ [ $.constructor_suffix ],
+ [ $.call_suffix ],
+
+ // `actor` is allowed to be an identifier, even though it is
+ // also a locally permitted declaration. If we encounter it, the
+ // only way to know what it's meant to be is to keep going.
+ [ $._modifierless_class_declaration, $.property_modifier ],
+ [ $._fn_call_lambda_arguments ],
+
+ // `borrowing` and `consuming` are legal as identifiers, but are
+ // also legal modifiers
+ [ $.parameter_modifiers ],
+
+ // These are keywords sometimes, but simple identifiers other
+ // times, and it just depends on the rest of their usage.
+ [
+ $._contextual_simple_identifier,
+ $._modifierless_class_declaration
+ ],
+ [
+ $._contextual_simple_identifier, $.property_behavior_modifier
+ ],
+ [ $._contextual_simple_identifier, $.parameter_modifier ],
+ [ $._contextual_simple_identifier, $.type_parameter_pack ],
+ [ $._contextual_simple_identifier, $.type_pack_expansion ],
+ [ $._contextual_simple_identifier, $.visibility_modifier ],
+],
+ extras : ($) => [$.comment,
+ $.multiline_comment,
+ /\s+/, // Whitespace
+],
+ externals : ($) => [
+ // Comments and raw strings are parsed in a custom scanner
+ // because they require us to carry forward state to maintain
+ // symmetry. For instance, parsing a multiline comment requires
+ // us to increment a counter whenever we see
+ // `/*`, and decrement it whenever we see `*/`. A standard
+ // grammar would only be able to exit the comment at the first
+ // `*/` (like C does). Similarly, when you start a string with
+ // `##"`, you're required to include the same number of `#`
+ // symbols to end it.
+ $.multiline_comment,
+ $.raw_str_part,
+ $.raw_str_continuing_indicator,
+ $.raw_str_end_part,
+ // Because Swift doesn't have explicit semicolons, we also do
+ // some whitespace handling in a custom scanner. Line breaks are
+ // _sometimes_ meaningful as the end of a statement: try to
+ // write `let foo: Foo let bar: Bar`, for instance and the
+ // compiler will complain, but add either a newline or a
+ // semicolon and it's fine. We borrow the idea from the Kotlin
+ // grammar that a newline is sometimes a "semicolon". By
+ // including `\n` in both `_semi` and an anonymous `whitespace`
+ // extras, we _should_ be able to let the parser decide if a
+ // newline is meaningful. If the parser sees something like
+ // `foo.bar(1\n)`, it knows that a "semicolon" would not be
+ // valid there, so it parses that as whitespace. On the other
+ // hand, `let foo: Foo\n let bar: Bar` has a meaningful newline.
+ // Unfortunately, we can't simply stop at that. There are some
+ // expressions and statements that remain valid if you end them
+ // early, but are expected to be parsed across multiple lines.
+ // One particular nefarious example is a function declaration,
+ // where you might have something like `func foo<A>(args: A) ->
+ // Foo throws where A: Hashable`. This would still be a valid
+ // declaration even if it ended after the `)`, the `Foo`, or the
+ // `throws`, so a grammar that simply interprets a newline as
+ // "sometimes a semi" would parse those incorrectly. To solve
+ // that case, our custom scanner must do a bit of extra
+ // lookahead itself. If we're about to generate a
+ // `_semi`, we advance a bit further to see if the next
+ // non-whitespace token would be one of these other operators.
+ // If so, we ignore the `_semi` and just produce the operator;
+ // if not, we produce the `_semi` and let the rest of the
+ // grammar sort it out. This isn't perfect, but it works well
+ // enough most of the time.
+ $._implicit_semi,
+ $._explicit_semi,
+ // Every one of the below operators will suppress a `_semi` if
+ // we encounter it after a newline.
+ $._arrow_operator_custom,
+ $._dot_custom,
+ $._conjunction_operator_custom,
+ $._disjunction_operator_custom,
+ $._nil_coalescing_operator_custom,
+ $._eq_custom,
+ $._eq_eq_custom,
+ $._plus_then_ws,
+ $._minus_then_ws,
+ $._bang_custom,
+ $._throws_keyword,
+ $._rethrows_keyword,
+ $.default_keyword,
+ $.where_keyword,
+ $["else"],
+ $.catch_keyword,
+ $._as_custom,
+ $._as_quest_custom,
+ $._as_bang_custom,
+ $._async_keyword_custom,
+ $._custom_operator,
+ $._hash_symbol_custom,
+ $._directive_if,
+ $._directive_elseif,
+ $._directive_else,
+ $._directive_endif,
+
+ // Fake operator that will never get triggered, but follows the
+ // sequence of characters for `try!`. Tracked by the custom
+ // scanner so that it can avoid triggering `$.bang` for that
+ // case.
+ $._fake_try_bang,
+],
+ inline : ($) => [$._locally_permitted_modifiers],
+ rules : {
+ ////////////////////////////////
+ // File Structure
+ ////////////////////////////////
+ source_file : ($) => seq(
+ optional($.shebang_line),
+ optional(seq($._top_level_statement,
+ repeat(seq($._semi, $._top_level_statement)),
+ optional($._semi)))),
+ _semi : ($) => choice($._implicit_semi, $._explicit_semi),
+ shebang_line : ($) => seq($._hash_symbol, "!", /[^\r\n]*/),
+ ////////////////////////////////
+ // Lexical Structure -
+ // https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html
+ ////////////////////////////////
+ comment : ($) => token(prec(PRECS.comment, seq("//", /.*/))),
+ // Identifiers
+ simple_identifier : ($) =>
+ choice(LEXICAL_IDENTIFIER, /`[^\r\n` ]*`/, /\$[0-9]+/,
+ token(seq("$", LEXICAL_IDENTIFIER)),
+ $._contextual_simple_identifier),
+ // Keywords that were added after they were already legal as identifiers.
+ // `tree-sitter` will prefer exact matches
+ // when parsing so unless we explicitly say that these are legal, the parser
+ // will interpret them as their keyword.
+ _contextual_simple_identifier : ($) =>
+ choice("actor", "async", "each", "lazy",
+ "repeat", "package",
+ $._parameter_ownership_modifier),
+ identifier : ($) => sep1($.simple_identifier, $._dot),
+ // Literals
+ _basic_literal : ($) =>
+ choice($.integer_literal, $.hex_literal, $.oct_literal,
+ $.bin_literal, $.real_literal, $.boolean_literal,
+ $._string_literal, $.regex_literal, "nil"),
+ real_literal : ($) => token(choice(seq(DEC_DIGITS, REAL_EXPONENT),
+ seq(optional(DEC_DIGITS), ".",
+ DEC_DIGITS, optional(REAL_EXPONENT)),
+ seq("0x", HEX_DIGITS,
+ optional(seq(".", HEX_DIGITS)),
+ HEX_REAL_EXPONENT))),
+ integer_literal : ($) => token(seq(optional(/[1-9]/), DEC_DIGITS)),
+ hex_literal : ($) => token(seq("0", /[xX]/, HEX_DIGITS)),
+ oct_literal : ($) => token(seq("0", /[oO]/, OCT_DIGITS)),
+ bin_literal : ($) => token(seq("0", /[bB]/, BIN_DIGITS)),
+ boolean_literal : ($) => choice("true", "false"),
+ // String literals
+ _string_literal : ($) => choice($.line_string_literal,
+ $.multi_line_string_literal,
+ $.raw_string_literal),
+ line_string_literal : ($) => seq(
+ '"',
+ repeat(choice(field("text", $._line_string_content),
+ $._interpolation)),
+ '"'),
+ _line_string_content : ($) => choice($.line_str_text, $.str_escaped_char),
+ line_str_text : ($) => /[^\\"]+/,
+ str_escaped_char : ($) => choice($._escaped_identifier,
+ $._uni_character_literal),
+ _uni_character_literal : ($) => seq("\\", "u", /\{[0-9a-fA-F]+\}/),
+ multi_line_string_literal : ($) =>
+ seq('"""',
+ repeat(choice(
+ field("text",
+ $._multi_line_string_content),
+ $._interpolation)),
+ '"""'),
+ raw_string_literal : ($) => seq(
+ repeat(seq(
+ field("text", $.raw_str_part),
+ field("interpolation", $.raw_str_interpolation),
+ optional($.raw_str_continuing_indicator))),
+ field("text", $.raw_str_end_part)),
+ raw_str_interpolation : ($) => seq($.raw_str_interpolation_start,
+ $._interpolation_contents, ")"),
+ raw_str_interpolation_start : ($) => /\\#*\(/,
+ _multi_line_string_content : ($) => choice($.multi_line_str_text,
+ $.str_escaped_char, '"'),
+ _interpolation : ($) => seq("\\(", $._interpolation_contents, ")"),
+ _interpolation_contents : ($) =>
+ sep1Opt(field("interpolation",
+ alias($.value_argument,
+ $.interpolated_expression)),
+ ","),
+ _escaped_identifier : ($) => /\\[0\\tnr"'\n]/,
+ multi_line_str_text : ($) => /[^\\"]+/,
+ // Based on
+ // https://gitlab.com/woolsweater/tree-sitter-swifter/-/blob/3d47c85bd47ce54cdf2023a9c0e01eb90adfcc1d/grammar.js#L1019
+ // But required modifications to hit all of the cases in SE-354
+ regex_literal : ($) => choice($._extended_regex_literal,
+ $._multiline_regex_literal,
+ $._oneline_regex_literal),
+
+ _extended_regex_literal : ($) =>
+ seq($._hash_symbol, /\/((\/[^#])|[^\n])+\/#/),
+
+ _multiline_regex_literal : ($) => seq($._hash_symbol, /\/\n/,
+ /(\/[^#]|[^/])*?\n\/#/),
+
+ _oneline_regex_literal : ($) => token(prec(
+ PRECS.regex,
+ seq("/",
+ token.immediate(/[^ \t\n]?[^/\n]*[^ \t\n/]/),
+ token.immediate("/")))),
+ ////////////////////////////////
+ // Types - https://docs.swift.org/swift-book/ReferenceManual/Types.html
+ ////////////////////////////////
+ type_annotation : ($) =>
+ seq(":", field("type",
+ $._possibly_implicitly_unwrapped_type)),
+ _possibly_implicitly_unwrapped_type : ($) =>
+ seq($._type,
+ optional(token.immediate("!"))),
+ _type : ($) =>
+ prec.right(PRECS.ty, seq(optional($.type_modifiers),
+ field("name", $._unannotated_type))),
+ _unannotated_type : ($) => prec.right(
+ PRECS.ty,
+ choice($.user_type, $.tuple_type, $.function_type,
+ $.array_type, $.dictionary_type,
+ $.optional_type, $.metatype, $.opaque_type,
+ $.existential_type,
+ $.protocol_composition_type,
+ $.type_parameter_pack, $.type_pack_expansion,
+ $.suppressed_constraint)),
+ // The grammar just calls this whole thing a `type-identifier` but that's a
+ // bit confusing.
+ user_type : ($) => sep1($._simple_user_type, $._dot),
+ _simple_user_type : ($) => prec.right(
+ PRECS.ty,
+ seq(alias($.simple_identifier, $.type_identifier),
+ optional($.type_arguments))),
+ tuple_type : ($) =>
+ choice(seq("(",
+ optional(sep1Opt(
+ field("element", $.tuple_type_item), ",")),
+ ")"),
+ alias($._parenthesized_type, $.tuple_type_item)),
+ tuple_type_item : ($) => prec(PRECS.expr,
+ seq(optional($._tuple_type_item_identifier),
+ optional($.parameter_modifiers),
+ field("type", $._type))),
+ _tuple_type_item_identifier : ($) =>
+ prec(PRECS.expr,
+ seq(optional($.wildcard_pattern),
+ field("name", $.simple_identifier),
+ ":")),
+ function_type : ($) =>
+ seq(field("params",
+ choice($.tuple_type, $._unannotated_type)),
+ optional($._async_keyword), optional($.throws),
+ $._arrow_operator, field("return_type", $._type)),
+ array_type : ($) => seq("[", field("element", $._type), "]"),
+ dictionary_type : ($) => seq("[", field("key", $._type), ":",
+ field("value", $._type), "]"),
+ optional_type : ($) => prec.left(seq(
+ field("wrapped", choice($.user_type, $.tuple_type,
+ $.array_type, $.dictionary_type)),
+ repeat1(alias($._immediate_quest, "?")))),
+ metatype : ($) => seq($._unannotated_type, ".", choice("Type", "Protocol")),
+ _quest : ($) => "?",
+ _immediate_quest : ($) => token.immediate("?"),
+ opaque_type : ($) => prec.right(seq("some", $._unannotated_type)),
+ existential_type : ($) => prec.right(seq("any", $._unannotated_type)),
+ type_parameter_pack : ($) => prec.left(seq("each", $._unannotated_type)),
+ type_pack_expansion : ($) => prec.left(seq("repeat", $._unannotated_type)),
+ protocol_composition_type : ($) => prec.left(seq(
+ $._unannotated_type,
+ repeat1(seq(
+ "&", prec.right($._unannotated_type))))),
+ suppressed_constraint : ($) => prec.right(
+ seq("~", field("suppressed",
+ alias($.simple_identifier,
+ $.type_identifier)))),
+ ////////////////////////////////
+ // Expressions -
+ // https://docs.swift.org/swift-book/ReferenceManual/Expressions.html
+ ////////////////////////////////
+ _expression : ($) =>
+ prec(PRECS.expr,
+ choice($.simple_identifier, $._unary_expression,
+ $._binary_expression, $.ternary_expression,
+ $._primary_expression, $.if_statement,
+ $.switch_statement, $.assignment,
+ $.value_parameter_pack, $.value_pack_expansion,
+ seq($._expression,
+ alias($._immediate_quest, "?")))),
+ // Unary expressions
+ _unary_expression : ($) =>
+ choice($.postfix_expression, $.call_expression,
+ $.macro_invocation, $.constructor_expression,
+ $.navigation_expression, $.prefix_expression,
+ $.as_expression, $.selector_expression,
+ $.open_start_range_expression,
+ $.open_end_range_expression, $.directive,
+ $.diagnostic),
+ postfix_expression : ($) => prec.left(
+ PRECS.postfix_operations,
+ seq(field("target", $._expression),
+ field("operation", $._postfix_unary_operator))),
+ constructor_expression : ($) => prec(
+ PRECS.call,
+ seq(field("constructed_type",
+ choice($.array_type, $.dictionary_type,
+ $.user_type)),
+ $.constructor_suffix)),
+ _parenthesized_type : ($) =>
+ seq("(",
+ field("element",
+ choice($.opaque_type, $.existential_type,
+ $.dictionary_type)),
+ ")"),
+ navigation_expression : ($) => prec.left(
+ PRECS.navigation,
+ seq(field("target",
+ choice($._navigable_type_expression,
+ $._expression,
+ $._parenthesized_type)),
+ field("suffix", $.navigation_suffix))),
+ _navigable_type_expression : ($) => choice($.user_type, $.array_type,
+ $.dictionary_type),
+ open_start_range_expression : ($) => prec.right(
+ PRECS.range,
+ seq($._range_operator,
+ prec.right(
+ PRECS.range_suffix,
+ field("end", $._expression)))),
+ _range_operator : ($) => choice($._open_ended_range_operator,
+ $._three_dot_operator),
+ open_end_range_expression : ($) =>
+ prec.right(PRECS.range,
+ seq(field("start", $._expression),
+ $._three_dot_operator)),
+ prefix_expression : ($) => prec.left(
+ PRECS.prefix_operations,
+ seq(field("operation", $._prefix_unary_operator),
+ field("target", choice($._expression,
+ alias(choice("async", "if",
+ "switch"),
+ $._expression))))),
+ as_expression : ($) => prec.left(PRECS.as, seq(field("expr", $._expression),
+ $.as_operator,
+ field("type", $._type))),
+ selector_expression : ($) => seq($._hash_symbol, "selector", "(",
+ optional(choice("getter:", "setter:")),
+ $._expression, ")"),
+ // Binary expressions
+ _binary_expression : ($) => choice(
+ $.multiplicative_expression, $.additive_expression,
+ $.range_expression, $.infix_expression,
+ $.nil_coalescing_expression, $.check_expression,
+ $.equality_expression, $.comparison_expression,
+ $.conjunction_expression, $.disjunction_expression,
+ $.bitwise_operation),
+ multiplicative_expression : ($) => prec.left(
+ PRECS.multiplication,
+ seq(field("lhs", $._expression),
+ field("op", $._multiplicative_operator),
+ field("rhs", $._expression))),
+ additive_expression : ($) =>
+ prec.left(PRECS.addition,
+ seq(field("lhs", $._expression),
+ field("op", $._additive_operator),
+ field("rhs", $._expression))),
+ range_expression : ($) => prec.right(
+ PRECS.range,
+ seq(field("start", $._expression),
+ field("op", $._range_operator),
+ field("end",
+ $._expr_hack_at_ternary_binary_suffix))),
+ infix_expression : ($) => prec.left(
+ PRECS.infix_operations,
+ seq(field("lhs", $._expression),
+ field("op", $.custom_operator),
+ field("rhs",
+ $._expr_hack_at_ternary_binary_suffix))),
+ nil_coalescing_expression : ($) => prec.right(
+ PRECS.nil_coalescing,
+ seq(field("value", $._expression),
+ $._nil_coalescing_operator,
+ field(
+ "if_nil",
+ $._expr_hack_at_ternary_binary_suffix))),
+ check_expression : ($) => prec.left(PRECS.check,
+ seq(field("target", $._expression),
+ field("op", $._is_operator),
+ field("type", $._type))),
+ comparison_expression : ($) => prec.left(seq(
+ field("lhs", $._expression),
+ field("op", $._comparison_operator),
+ field("rhs",
+ $._expr_hack_at_ternary_binary_suffix))),
+ equality_expression : ($) => prec.left(
+ PRECS.equality,
+ seq(field("lhs", $._expression),
+ field("op", $._equality_operator),
+ field("rhs",
+ $._expr_hack_at_ternary_binary_suffix))),
+ conjunction_expression : ($) => prec.left(
+ PRECS.conjunction,
+ seq(field("lhs", $._expression),
+ field("op", $._conjunction_operator),
+ field(
+ "rhs",
+ $._expr_hack_at_ternary_binary_suffix))),
+ disjunction_expression : ($) => prec.left(
+ PRECS.disjunction,
+ seq(field("lhs", $._expression),
+ field("op", $._disjunction_operator),
+ field(
+ "rhs",
+ $._expr_hack_at_ternary_binary_suffix))),
+ bitwise_operation : ($) => prec.left(seq(
+ field("lhs", $._expression),
+ field("op", $._bitwise_binary_operator),
+ field("rhs", $._expr_hack_at_ternary_binary_suffix))),
+ custom_operator : ($) => choice(token(/[\/]+[*]+/), $._custom_operator),
+ // Suffixes
+ navigation_suffix : ($) => seq($._dot,
+ field("suffix", choice($.simple_identifier,
+ $.integer_literal))),
+ call_suffix : ($) => prec(
+ PRECS.call_suffix,
+ choice(
+ $.value_arguments,
+ prec.dynamic(
+ -1,
+ $._fn_call_lambda_arguments), // Prefer to treat
+ // `foo() { }` as one
+ // call not two
+ seq($.value_arguments, $._fn_call_lambda_arguments))),
+ constructor_suffix : ($) => prec(
+ PRECS.call_suffix,
+ choice(
+ alias($._constructor_value_arguments,
+ $.value_arguments),
+ prec.dynamic(
+ -1, $._fn_call_lambda_arguments), // As above
+ seq(alias($._constructor_value_arguments,
+ $.value_arguments),
+ $._fn_call_lambda_arguments))),
+ _constructor_value_arguments : ($) => seq(
+ "(",
+ optional(sep1Opt($.value_argument, ",")),
+ ")"),
+ _fn_call_lambda_arguments : ($) => sep1(
+ $.lambda_literal,
+ seq(field("name", $.simple_identifier), ":")),
+ type_arguments : ($) => prec.left(seq("<", sep1Opt($._type, ","), ">")),
+ value_arguments : ($) => seq(choice(
+ seq("(", optional(sep1Opt($.value_argument, ",")), ")"),
+ seq("[", optional(sep1Opt($.value_argument, ",")),
+ "]"))),
+ value_argument_label : ($) => prec.left(choice(
+ $.simple_identifier,
+ // We don't rely on $._contextual_simple_identifier
+ // here because these don't usually fall into that
+ // category.
+ alias("if", $.simple_identifier),
+ alias("switch", $.simple_identifier))),
+ value_argument : ($) => prec.left(seq(
+ optional($.type_modifiers),
+ choice(
+ repeat1(seq(field("reference_specifier",
+ $.value_argument_label),
+ ":")),
+ seq(optional(seq(
+ field("name", $.value_argument_label), ":")),
+ field("value", $._expression))))),
+ try_expression : ($) => prec.right(
+ PRECS["try"],
+ seq($.try_operator,
+ field("expr",
+ choice(
+ // Prefer direct calls, e.g. `try foo()`,
+ // over indirect like `try a ? b() : c`.
+ // This allows us to have left
+ // associativity for the direct calls,
+ // which is technically wrong but is the
+ // only way to resolve the ambiguity of `if
+ // foo { ... }` in the correct direction.
+ prec.right(-2, $._expression),
+ prec.left(0, $._binary_expression),
+ prec.left(0, $.call_expression),
+ // Similarly special case the ternary
+ // expression, where `try` may come earlier
+ // than it is actually needed. When the
+ // parser just encounters some identifier
+ // after a `try`, it should prefer the
+ // `call_expression` (so this should be
+ // lower in priority than that), but when
+ // we encounter an ambiguous expression
+ // that might be either `try (foo() ? ...)`
+ // or `(try foo()) ? ...`, we should prefer
+ // the former. We accomplish that by giving
+ // it a _static precedence_ of -1 but a
+ // _dynamic precedence_ of 1.
+ prec.dynamic(
+ 1, prec.left(
+ -1, $.ternary_expression)))))),
+ await_expression : ($) => prec.right(
+ PRECS.await,
+ seq($._await_operator,
+ field(
+ "expr",
+ choice(
+ // Prefer direct calls over indirect (same
+ // as with `try`).
+ prec.right(-2, $._expression),
+ prec.left(0, $.call_expression),
+ // Special case ternary to `await` the
+ // whole thing (same as with `try`).
+ prec.dynamic(
+ 1, prec.left(
+ -1, $.ternary_expression)))))),
+ _await_operator : ($) => alias("await", "await"),
+ ternary_expression : ($) => prec.right(
+ PRECS.ternary,
+ seq(field("condition", $._expression), $._quest,
+ field("if_true", $._expression), ":",
+ field("if_false",
+ $._expr_hack_at_ternary_binary_suffix))),
+ _expr_hack_at_ternary_binary_suffix : ($) => prec.left(
+ PRECS.ternary_binary_suffix,
+ choice(
+ $._expression,
+ alias(
+ $.expr_hack_at_ternary_binary_call,
+ $.call_expression))),
+ expr_hack_at_ternary_binary_call : ($) => seq(
+ $._expression,
+ alias(
+ $.expr_hack_at_ternary_binary_call_suffix,
+ $.call_suffix)),
+ expr_hack_at_ternary_binary_call_suffix : ($) => prec(PRECS.call_suffix,
+ $.value_arguments),
+ call_expression : ($) => prec(PRECS.call, prec.dynamic(DYNAMIC_PRECS.call,
+ seq($._expression,
+ $.call_suffix))),
+ macro_invocation : ($) => prec(
+ PRECS.call,
+ prec.dynamic(DYNAMIC_PRECS.call,
+ seq($._hash_symbol, $.simple_identifier,
+ optional($.type_parameters),
+ $.call_suffix))),
+ _primary_expression : ($) => choice(
+ $.tuple_expression, $._basic_literal,
+ $.lambda_literal, $.special_literal,
+ $.playground_literal, $.array_literal,
+ $.dictionary_literal, $.self_expression,
+ $.super_expression, $.try_expression,
+ $.await_expression, $._referenceable_operator,
+ $.key_path_expression, $.key_path_string_expression,
+ prec.right(PRECS.fully_open_range,
+ alias($._three_dot_operator,
+ $.fully_open_range))),
+ tuple_expression : ($) => prec.right(
+ PRECS.tuple,
+ seq("(",
+ sep1Opt(seq(optional(seq(
+ field("name", $.simple_identifier),
+ ":")),
+ field("value", $._expression)),
+ ","),
+ ")")),
+ array_literal : ($) => seq(
+ "[",
+ optional(sep1Opt(field("element", $._expression), ",")),
+ "]"),
+ dictionary_literal : ($) =>
+ seq("[",
+ choice(":",
+ sep1Opt($._dictionary_literal_item, ",")),
+ optional(","), "]"),
+ _dictionary_literal_item : ($) => seq(field("key", $._expression), ":",
+ field("value", $._expression)),
+ special_literal : ($) => seq($._hash_symbol,
+ choice("file", "fileID", "filePath", "line",
+ "column", "function", "dsohandle")),
+ playground_literal : ($) => seq(
+ $._hash_symbol,
+ choice("colorLiteral", "fileLiteral",
+ "imageLiteral"),
+ "(",
+ sep1Opt(seq($.simple_identifier, ":", $._expression),
+ ","),
+ ")"),
+ lambda_literal : ($) => prec.left(PRECS.lambda,
+ seq(choice("{", "^{"),
+ optional($._lambda_type_declaration),
+ optional($.statements), "}")),
+ _lambda_type_declaration : ($) => seq(repeat($.attribute),
+ prec(PRECS.expr,
+ optional(field("captures",
+ $.capture_list))),
+ optional(field(
+ "type", $.lambda_function_type)),
+ "in"),
+ capture_list : ($) => seq("[", sep1Opt($.capture_list_item, ","), "]"),
+ capture_list_item : ($) => choice(
+ field("name", $.self_expression),
+ prec(PRECS.expr,
+ seq(optional($.ownership_modifier),
+ field("name", $.simple_identifier),
+ optional(
+ seq($._equal_sign,
+ field("value", $._expression)))))),
+ lambda_function_type : ($) => prec(
+ PRECS.expr,
+ seq(choice(
+ $.lambda_function_type_parameters,
+ seq("(",
+ optional(
+ $.lambda_function_type_parameters),
+ ")")),
+ optional($._async_keyword), optional($.throws),
+ optional(seq(
+ $._arrow_operator,
+ field(
+ "return_type",
+ $._possibly_implicitly_unwrapped_type))))),
+ lambda_function_type_parameters : ($) => sep1Opt($.lambda_parameter, ","),
+ lambda_parameter : ($) => seq(choice(
+ $.self_expression,
+ prec(PRECS.expr, field("name", $.simple_identifier)),
+ prec(
+ PRECS.expr,
+ seq(optional(field("external_name",
+ $.simple_identifier)),
+ field("name", $.simple_identifier), ":",
+ optional($.parameter_modifiers),
+ field(
+ "type",
+ $._possibly_implicitly_unwrapped_type))))),
+ self_expression : ($) => "self",
+ super_expression : ($) => seq("super"),
+ _else_options : ($) => choice($._block, $.if_statement),
+ if_statement : ($) => prec.right(
+ PRECS["if"],
+ seq("if",
+ sep1(field("condition", $._if_condition_sequence_item),
+ ","),
+ $._block, optional(seq($["else"], $._else_options)))),
+ _if_condition_sequence_item : ($) =>
+ choice($._if_let_binding, $._expression,
+ $.availability_condition),
+ _if_let_binding : ($) => seq($._direct_or_indirect_binding,
+ optional(seq($._equal_sign, $._expression)),
+ optional($.where_clause)),
+ guard_statement : ($) => prec.right(
+ PRECS["if"],
+ seq("guard",
+ sep1(field("condition",
+ $._if_condition_sequence_item),
+ ","),
+ $["else"], $._block)),
+ switch_statement : ($) =>
+ prec.right(PRECS["switch"],
+ seq("switch", field("expr", $._expression),
+ "{", repeat($.switch_entry), "}")),
+ switch_entry : ($) => seq(optional($.modifiers),
+ choice(seq("case",
+ seq($.switch_pattern,
+ optional(seq($.where_keyword,
+ $._expression))),
+ repeat(seq(",", $.switch_pattern))),
+ $.default_keyword),
+ ":", $.statements, optional("fallthrough")),
+ switch_pattern : ($) => alias($._binding_pattern_with_expr, $.pattern),
+ do_statement : ($) => prec.right(
+ PRECS["do"], seq("do", $._block, repeat($.catch_block))),
+ catch_block : ($) => seq(
+ $.catch_keyword,
+ field("error", optional(alias($._binding_pattern_no_expr,
+ $.pattern))),
+ optional($.where_clause), $._block),
+ where_clause : ($) => prec.left(seq($.where_keyword, $._expression)),
+ key_path_expression : ($) => prec.right(
+ PRECS.keypath,
+ seq("\\",
+ optional(choice($._simple_user_type,
+ $.array_type,
+ $.dictionary_type)),
+ repeat(seq(".", $._key_path_component)))),
+ key_path_string_expression : ($) => prec.left(seq($._hash_symbol, "keyPath",
+ "(", $._expression, ")")),
+ _key_path_component : ($) =>
+ prec.left(choice(seq($.simple_identifier,
+ repeat($._key_path_postfixes)),
+ repeat1($._key_path_postfixes))),
+ _key_path_postfixes : ($) => choice(
+ "?", $.bang, "self",
+ seq("[", optional(sep1($.value_argument, ",")),
+ "]")),
+ try_operator : ($) => prec.right(
+ seq("try", choice(optional($._try_operator_type),
+ $._fake_try_bang))),
+ _try_operator_type : ($) =>
+ choice(token.immediate("!"), token.immediate("?")),
+ _assignment_and_operator : ($) => choice(
+ "+=", "-=", "*=", "/=", "%=", $._equal_sign),
+ _equality_operator : ($) => choice("!=", "!==", $._eq_eq, "==="),
+ _comparison_operator : ($) => choice("<", ">", "<=", ">="),
+ _three_dot_operator : ($) => alias(
+ "...",
+ "..."), // Weird alias to satisfy highlight queries
+ _open_ended_range_operator : ($) => alias("..<", "..<"),
+ _is_operator : ($) => "is",
+ _additive_operator : ($) => choice(alias($._plus_then_ws, "+"),
+ alias($._minus_then_ws, "-"), "+", "-"),
+ // The `/` operator conflicts with a regex literal (which itself appears to
+ // conflict with a
+ // comment, for some reason), so we must give it equivalent token
+ // precedence.
+ _multiplicative_operator : ($) => choice(
+ "*", alias(token(prec(PRECS.regex, "/")), "/"),
+ "%"),
+ as_operator : ($) => choice($._as, $._as_quest, $._as_bang),
+ _prefix_unary_operator : ($) => prec.right(choice("++", "--", "-", "+",
+ $.bang, "&", "~", $._dot,
+ $.custom_operator)),
+ _bitwise_binary_operator : ($) => choice("&", "|", "^", "<<", ">>"),
+ _postfix_unary_operator : ($) => choice("++", "--", $.bang),
+ directly_assignable_expression : ($) => $._expression,
+
+ ////////////////////////////////
+ // Statements -
+ // https://docs.swift.org/swift-book/ReferenceManual/Statements.html
+ ////////////////////////////////
+ statements : ($) => prec.left(
+ // Left precedence is required in switch statements
+ seq($._local_statement,
+ repeat(seq($._semi, $._local_statement)),
+ optional($._semi))),
+ _local_statement : ($) => choice($._expression, $._local_declaration,
+ $._labeled_statement,
+ $.control_transfer_statement),
+ _top_level_statement : ($) =>
+ choice($._expression, $._global_declaration,
+ $._labeled_statement, $._throw_statement),
+ _block : ($) => prec(PRECS.block, seq("{", optional($.statements), "}")),
+ _labeled_statement : ($) =>
+ seq(optional($.statement_label),
+ choice($.for_statement, $.while_statement,
+ $.repeat_while_statement, $.do_statement,
+ $.if_statement, $.guard_statement,
+ $.switch_statement)),
+ statement_label : ($) => token(/[a-zA-Z_][a-zA-Z_0-9]*:/),
+ for_statement : ($) =>
+ prec(PRECS.loop,
+ seq("for", optional($.try_operator),
+ optional($._await_operator),
+ field("item", alias($._binding_pattern_no_expr,
+ $.pattern)),
+ optional($.type_annotation), "in",
+ field("collection", $._for_statement_collection),
+ optional($.where_clause), $._block)),
+ _for_statement_collection : ($) =>
+ // If this expression has "await", this
+ // triggers some special-cased logic to prefer
+ // function calls. We prefer
+ // the opposite, though, since function calls may
+ // contain trailing code blocks, which are
+ // undesirable here.
+ //
+ // To fix that, we simply undo the special casing
+ // by defining our own `await_expression`.
+ choice($._expression, alias($.for_statement_await,
+ $.await_expression)),
+ for_statement_await : ($) => seq($._await_operator, $._expression),
+
+ while_statement : ($) => prec(PRECS.loop,
+ seq("while",
+ sep1(field("condition",
+ $._if_condition_sequence_item),
+ ","),
+ "{", optional($.statements), "}")),
+ repeat_while_statement : ($) => prec(
+ PRECS.loop,
+ seq("repeat", "{", optional($.statements), "}",
+ // Make sure we make it to the `while` before
+ // assuming this is a parameter pack.
+ repeat($._implicit_semi), "while",
+ sep1(field("condition",
+ $._if_condition_sequence_item),
+ ","))),
+ control_transfer_statement : ($) => choice(
+ prec.right(PRECS.control_transfer,
+ $._throw_statement),
+ prec.right(
+ PRECS.control_transfer,
+ seq($._optionally_valueful_control_keyword,
+ field("result",
+ optional($._expression))))),
+ _throw_statement : ($) => seq($.throw_keyword, $._expression),
+ throw_keyword : ($) => "throw",
+ _optionally_valueful_control_keyword : ($) => choice("return", "continue",
+ "break", "yield"),
+ assignment : ($) => prec.left(
+ PRECS.assignment,
+ seq(field("target", $.directly_assignable_expression),
+ field("operator", $._assignment_and_operator),
+ field("result", $._expression))),
+ value_parameter_pack : ($) => prec.left(PRECS.parameter_pack,
+ seq("each", $._expression)),
+ value_pack_expansion : ($) => prec.left(PRECS.parameter_pack,
+ seq("repeat", $._expression)),
+ availability_condition : ($) =>
+ seq($._hash_symbol,
+ choice("available", "unavailable"), "(",
+ sep1Opt($._availability_argument, ","), ")"),
+ _availability_argument : ($) => choice(
+ seq($.identifier, sep1($.integer_literal, ".")),
+ "*"),
+ ////////////////////////////////
+ // Declarations -
+ // https://docs.swift.org/swift-book/ReferenceManual/Declarations.html
+ ////////////////////////////////
+ _global_declaration : ($) => choice(
+ $.import_declaration, $.property_declaration,
+ $.typealias_declaration, $.function_declaration,
+ $.init_declaration, $.class_declaration,
+ $.protocol_declaration, $.operator_declaration,
+ $.precedence_group_declaration,
+ $.associatedtype_declaration, $.macro_declaration),
+ _type_level_declaration : ($) => choice(
+ $.import_declaration, $.property_declaration,
+ $.typealias_declaration, $.function_declaration,
+ $.init_declaration, $.class_declaration,
+ $.protocol_declaration, $.deinit_declaration,
+ $.subscript_declaration, $.operator_declaration,
+ $.precedence_group_declaration,
+ $.associatedtype_declaration),
+ _local_declaration : ($) => choice(alias($._local_property_declaration,
+ $.property_declaration),
+ alias($._local_typealias_declaration,
+ $.typealias_declaration),
+ alias($._local_function_declaration,
+ $.function_declaration),
+ alias($._local_class_declaration,
+ $.class_declaration)),
+ _local_property_declaration : ($) => seq(
+ optional($._locally_permitted_modifiers),
+ $._modifierless_property_declaration),
+ _local_typealias_declaration : ($) => seq(
+ optional($._locally_permitted_modifiers),
+ $._modifierless_typealias_declaration),
+ _local_function_declaration : ($) => seq(
+ optional($._locally_permitted_modifiers),
+ $._modifierless_function_declaration),
+ _local_class_declaration : ($) =>
+ seq(optional($._locally_permitted_modifiers),
+ $._modifierless_class_declaration),
+ import_declaration : ($) => seq(optional($.modifiers), "import",
+ optional($._import_kind), $.identifier),
+ _import_kind : ($) => choice("typealias", "struct", "class", "enum",
+ "protocol", "let", "var", "func"),
+ protocol_property_declaration : ($) => prec.right(seq(
+ optional($.modifiers),
+ field("name",
+ alias($._binding_kind_and_pattern,
+ $.pattern)),
+ optional($.type_annotation),
+ optional($.type_constraints),
+ $.protocol_property_requirements)),
+ protocol_property_requirements : ($) =>
+ seq("{",
+ repeat(choice($.getter_specifier,
+ $.setter_specifier)),
+ "}"),
+ property_declaration : ($) => seq(optional($.modifiers),
+ $._modifierless_property_declaration),
+ _modifierless_property_declaration : ($) => prec.right(seq(
+ $._possibly_async_binding_pattern_kind,
+ sep1(
+ $._single_modifierless_property_declaration,
+ ","))),
+ _single_modifierless_property_declaration : ($) => prec.left(seq(
+ field(
+ "name",
+ alias(
+ $._no_expr_pattern_already_bound,
+ $.pattern)),
+ optional($.type_annotation),
+ optional($.type_constraints),
+ optional(choice(
+ $._expression_with_willset_didset,
+ $._expression_without_willset_didset,
+ $.willset_didset_block,
+ field(
+ "computed_value",
+ $.computed_property))))),
+ _expression_with_willset_didset : ($) => prec.dynamic(
+ 1, seq($._equal_sign,
+ field("value", $._expression),
+ $.willset_didset_block)),
+ _expression_without_willset_didset : ($) =>
+ seq($._equal_sign,
+ field("value", $._expression)),
+ willset_didset_block : ($) => choice(seq("{", $.willset_clause,
+ optional($.didset_clause), "}"),
+ seq("{", $.didset_clause,
+ optional($.willset_clause), "}")),
+ willset_clause : ($) => seq(optional($.modifiers), "willSet",
+ optional(seq("(", $.simple_identifier, ")")),
+ $._block),
+ didset_clause : ($) => seq(optional($.modifiers), "didSet",
+ optional(seq("(", $.simple_identifier, ")")),
+ $._block),
+ typealias_declaration : ($) => seq(optional($.modifiers),
+ $._modifierless_typealias_declaration),
+ _modifierless_typealias_declaration : ($) =>
+ seq("typealias",
+ field("name",
+ alias($.simple_identifier,
+ $.type_identifier)),
+ optional($.type_parameters),
+ $._equal_sign,
+ field("value", $._type)),
+ function_declaration : ($) =>
+ prec.right(seq($._bodyless_function_declaration,
+ field("body", $.function_body))),
+ _modifierless_function_declaration : ($) => prec.right(seq(
+ $._modifierless_function_declaration_no_body,
+ field("body", $.function_body))),
+ _bodyless_function_declaration : ($) => seq(
+ optional($.modifiers),
+ optional("class"), // XXX: This should be
+ // possible in
+ // non-last position,
+ // but that creates
+ // parsing ambiguity
+ $._modifierless_function_declaration_no_body),
+ _modifierless_function_declaration_no_body : ($) => prec.right(seq(
+ $._non_constructor_function_decl,
+ optional($.type_parameters),
+ $._function_value_parameters,
+ optional($._async_keyword),
+ optional($.throws),
+ optional(seq(
+ $._arrow_operator,
+ field(
+ "return_type",
+ $._possibly_implicitly_unwrapped_type))),
+ optional(
+ $.type_constraints))),
+ function_body : ($) => $._block,
+ macro_declaration : ($) =>
+ seq($._macro_head, $.simple_identifier,
+ optional($.type_parameters), $._macro_signature,
+ optional(field("definition", $.macro_definition)),
+ optional($.type_constraints)),
+ _macro_head : ($) => seq(optional($.modifiers), "macro"),
+ _macro_signature : ($) => seq(
+ $._function_value_parameters,
+ optional(seq($._arrow_operator, $._unannotated_type))),
+ macro_definition : ($) => seq(
+ $._equal_sign,
+ field("body", choice($._expression,
+ $.external_macro_definition))),
+
+ external_macro_definition : ($) => seq($._hash_symbol, "externalMacro",
+ $.value_arguments),
+
+ class_declaration : ($) => seq(optional($.modifiers),
+ $._modifierless_class_declaration),
+ _modifierless_class_declaration : ($) => prec.right(choice(
+ seq(field("declaration_kind",
+ choice("class", "struct",
+ "actor")),
+ field("name",
+ alias($.simple_identifier,
+ $.type_identifier)),
+ optional($.type_parameters),
+ optional(
+ seq(":",
+ $._inheritance_specifiers)),
+ optional($.type_constraints),
+ field("body", $.class_body)),
+ seq(field("declaration_kind",
+ "extension"),
+ field("name", $._unannotated_type),
+ optional($.type_parameters),
+ optional(
+ seq(":",
+ $._inheritance_specifiers)),
+ optional($.type_constraints),
+ field("body", $.class_body)),
+ seq(optional("indirect"),
+ field("declaration_kind", "enum"),
+ field("name",
+ alias($.simple_identifier,
+ $.type_identifier)),
+ optional($.type_parameters),
+ optional(
+ seq(":",
+ $._inheritance_specifiers)),
+ optional($.type_constraints),
+ field("body", $.enum_class_body)))),
+ class_body : ($) => seq("{", optional($._class_member_declarations), "}"),
+ _inheritance_specifiers : ($) => prec.left(
+ sep1($._annotated_inheritance_specifier,
+ choice(",", "&"))),
+ inheritance_specifier : ($) => prec.left(
+ field("inherits_from",
+ choice($.user_type, $.function_type,
+ $.suppressed_constraint))),
+ _annotated_inheritance_specifier : ($) => seq(repeat($.attribute),
+ $.inheritance_specifier),
+ type_parameters : ($) => seq("<", sep1Opt($.type_parameter, ","),
+ optional($.type_constraints), ">"),
+ type_parameter : ($) => seq(optional($.type_parameter_modifiers),
+ $._type_parameter_possibly_packed,
+ optional(seq(":", $._type))),
+ _type_parameter_possibly_packed : ($) => choice(alias($.simple_identifier,
+ $.type_identifier),
+ $.type_parameter_pack),
+
+ type_constraints : ($) => prec.right(
+ seq($.where_keyword, sep1Opt($.type_constraint, ","))),
+ type_constraint : ($) =>
+ choice($.inheritance_constraint, $.equality_constraint),
+ inheritance_constraint : ($) => seq(
+ repeat($.attribute),
+ field("constrained_type", $._constrained_type),
+ ":",
+ field("inherits_from",
+ $._possibly_implicitly_unwrapped_type)),
+ equality_constraint : ($) =>
+ seq(repeat($.attribute),
+ field("constrained_type", $._constrained_type),
+ choice($._equal_sign, $._eq_eq),
+ field("must_equal", $._type)),
+ _constrained_type : ($) =>
+ choice($.identifier,
+ seq($._unannotated_type,
+ optional(seq(".", sep1($.simple_identifier,
+ "."))))),
+ _class_member_separator : ($) => choice($._semi, $.multiline_comment),
+ _class_member_declarations : ($) =>
+ seq(sep1($._type_level_declaration,
+ $._class_member_separator),
+ optional($._class_member_separator)),
+ _function_value_parameters : ($) => repeat1(
+ seq("(",
+ optional(sep1Opt(
+ $._function_value_parameter, ",")),
+ ")")),
+ _function_value_parameter : ($) => seq(optional($.attribute), $.parameter,
+ optional(seq($._equal_sign,
+ field("default_value",
+ $._expression)))),
+ parameter : ($) =>
+ seq(optional(field("external_name", $.simple_identifier)),
+ field("name", $.simple_identifier), ":",
+ optional($.parameter_modifiers),
+ field("type", $._possibly_implicitly_unwrapped_type),
+ optional($._three_dot_operator)),
+ _non_constructor_function_decl : ($) => seq(
+ "func",
+ field(
+ "name",
+ choice($.simple_identifier,
+ $._referenceable_operator))),
+ _referenceable_operator : ($) => choice(
+ $.custom_operator, $._comparison_operator,
+ $._additive_operator,
+ $._multiplicative_operator,
+ $._equality_operator, $._comparison_operator,
+ $._assignment_and_operator, "++", "--", $.bang,
+ "~", "|", "^", "<<", ">>", "&"),
+ // Hide the fact that certain symbols come from the custom scanner by
+ // aliasing them to their
+ // string variants. This keeps us from having to see them in the syntax tree
+ // (which would be
+ // noisy) but allows callers to refer to them as nodes by their text form
+ // like with any
+ // operator.
+ _equal_sign : ($) => alias($._eq_custom, "="),
+ _eq_eq : ($) => alias($._eq_eq_custom, "=="),
+ _dot : ($) => alias($._dot_custom, "."),
+ _arrow_operator : ($) => alias($._arrow_operator_custom, "->"),
+ _conjunction_operator : ($) => alias($._conjunction_operator_custom, "&&"),
+ _disjunction_operator : ($) => alias($._disjunction_operator_custom, "||"),
+ _nil_coalescing_operator : ($) =>
+ alias($._nil_coalescing_operator_custom, "??"),
+ _as : ($) => alias($._as_custom, "as"),
+ _as_quest : ($) => alias($._as_quest_custom, "as?"),
+ _as_bang : ($) => alias($._as_bang_custom, "as!"),
+ _hash_symbol : ($) => alias($._hash_symbol_custom, "#"),
+ bang : ($) => choice($._bang_custom, "!"),
+ _async_keyword : ($) => alias($._async_keyword_custom, "async"),
+ _async_modifier : ($) => token("async"),
+ throws : ($) => choice($._throws_keyword, $._rethrows_keyword),
+ enum_class_body : ($) => seq(
+ "{",
+ repeat(choice($.enum_entry, $._type_level_declaration)),
+ "}"),
+ enum_entry : ($) => seq(optional($.modifiers), optional("indirect"), "case",
+ sep1(seq(field("name", $.simple_identifier),
+ optional($._enum_entry_suffix)),
+ ","),
+ optional(";")),
+ _enum_entry_suffix : ($) => choice(
+ field("data_contents", $.enum_type_parameters),
+ seq($._equal_sign,
+ field("raw_value", $._expression))),
+ enum_type_parameters : ($) => seq(
+ "(",
+ optional(sep1(
+ seq(optional(seq(optional($.wildcard_pattern),
+ $.simple_identifier, ":")),
+ $._type,
+ optional(
+ seq($._equal_sign, $._expression))),
+ ",")),
+ ")"),
+ protocol_declaration : ($) => prec.right(
+ seq(optional($.modifiers),
+ field("declaration_kind", "protocol"),
+ field("name", alias($.simple_identifier,
+ $.type_identifier)),
+ optional($.type_parameters),
+ optional(seq(":", $._inheritance_specifiers)),
+ optional($.type_constraints),
+ field("body", $.protocol_body))),
+ protocol_body : ($) =>
+ seq("{", optional($._protocol_member_declarations), "}"),
+ _protocol_member_declarations : ($) =>
+ seq(sep1($._protocol_member_declaration,
+ $._semi),
+ optional($._semi)),
+ _protocol_member_declaration : ($) => choice(
+ alias(seq($._bodyless_function_declaration,
+ optional(field(
+ "body", $.function_body))),
+ $.protocol_function_declaration),
+ $.init_declaration, $.deinit_declaration,
+ $.protocol_property_declaration,
+ $.typealias_declaration,
+ $.associatedtype_declaration,
+ $.subscript_declaration),
+ init_declaration : ($) => prec.right(
+ seq(optional($.modifiers), optional("class"),
+ field("name", "init"),
+ optional(choice($._quest, $.bang)),
+ optional($.type_parameters),
+ $._function_value_parameters,
+ optional($._async_keyword), optional($.throws),
+ optional($.type_constraints),
+ optional(field("body", $.function_body)))),
+ deinit_declaration : ($) => prec.right(seq(optional($.modifiers), "deinit",
+ field("body", $.function_body))),
+ subscript_declaration : ($) => prec.right(seq(
+ optional($.modifiers), "subscript",
+ optional($.type_parameters),
+ $._function_value_parameters,
+ optional(seq(
+ $._arrow_operator,
+ field(
+ "return_type",
+ $._possibly_implicitly_unwrapped_type))),
+ optional($.type_constraints),
+ $.computed_property)),
+ computed_property : ($) => seq("{",
+ choice(optional($.statements),
+ repeat(choice($.computed_getter,
+ $.computed_setter,
+ $.computed_modify))),
+ "}"),
+ computed_getter : ($) => seq(repeat($.attribute), $.getter_specifier,
+ optional($._block)),
+ computed_modify : ($) => seq(repeat($.attribute), $.modify_specifier,
+ optional($._block)),
+ computed_setter : ($) => seq(repeat($.attribute), $.setter_specifier,
+ optional(seq("(", $.simple_identifier, ")")),
+ optional($._block)),
+ getter_specifier : ($) => seq(optional($.mutation_modifier), "get",
+ optional($._getter_effects)),
+ setter_specifier : ($) => seq(optional($.mutation_modifier), "set"),
+ modify_specifier : ($) => seq(optional($.mutation_modifier), "_modify"),
+ _getter_effects : ($) => repeat1(choice($._async_keyword, $.throws)),
+ operator_declaration : ($) => seq(
+ choice("prefix", "infix", "postfix"), "operator",
+ $._referenceable_operator,
+ optional(seq(":", $.simple_identifier)),
+ optional($.deprecated_operator_declaration_body)),
+ // The Swift compiler no longer accepts these, but some very old code still
+ // uses it.
+ deprecated_operator_declaration_body : ($) => seq(
+ "{",
+ repeat(choice($.simple_identifier,
+ $._basic_literal)),
+ "}"),
+ precedence_group_declaration : ($) => seq(
+ "precedencegroup", $.simple_identifier,
+ "{",
+ optional($.precedence_group_attributes),
+ "}"),
+ precedence_group_attributes : ($) => repeat1($.precedence_group_attribute),
+ precedence_group_attribute : ($) => seq($.simple_identifier, ":",
+ choice($.simple_identifier,
+ $.boolean_literal)),
+ associatedtype_declaration : ($) =>
+ seq(optional($.modifiers), "associatedtype",
+ field("name", alias($.simple_identifier,
+ $.type_identifier)),
+ optional(seq(":", field("must_inherit",
+ $._type))),
+ optional($.type_constraints),
+ optional(seq(
+ $._equal_sign,
+ field("default_value", $._type)))),
+ ////////////////////////////////
+ // Attributes -
+ // https://docs.swift.org/swift-book/ReferenceManual/Attributes.html
+ ////////////////////////////////
+ attribute : ($) => seq(
+ "@", $.user_type,
+ // attribute arguments are a mess of special cases, maybe this
+ // is good enough?
+ optional(seq("(", sep1Opt($._attribute_argument, ","), ")"))),
+ _attribute_argument : ($) => choice(
+ // labeled function parameters, used in custom
+ // property wrappers
+ seq($.simple_identifier, ":", $._expression),
+ // Unlabeled function parameters, simple
+ // identifiers, or `*`
+ $._expression,
+ // References to param names (used in
+ // `@objc(foo:bar:)`)
+ repeat1(seq($.simple_identifier, ":")),
+ // Version restrictions (iOS 3.4.5, Swift 5.0.0)
+ seq(repeat1($.simple_identifier),
+ sep1($.integer_literal, "."))),
+ ////////////////////////////////
+ // Patterns -
+ // https://docs.swift.org/swift-book/ReferenceManual/Patterns.html
+ ////////////////////////////////
+ _universally_allowed_pattern : ($) => choice(
+ $.wildcard_pattern, $._tuple_pattern,
+ $._type_casting_pattern, $._case_pattern),
+ _bound_identifier : ($) => field("bound_identifier", $.simple_identifier),
+
+ _binding_pattern_no_expr : ($) => seq(choice($._universally_allowed_pattern,
+ $._binding_pattern,
+ $._bound_identifier),
+ optional($._quest)),
+ _no_expr_pattern_already_bound : ($) => seq(
+ choice($._universally_allowed_pattern,
+ $._bound_identifier),
+ optional($._quest)),
+ _binding_pattern_with_expr : ($) => seq(
+ choice($._universally_allowed_pattern,
+ $._binding_pattern, $._expression),
+ optional($._quest)),
+ _non_binding_pattern_with_expr : ($) => seq(
+ choice($._universally_allowed_pattern,
+ $._expression),
+ optional($._quest)),
+ _direct_or_indirect_binding : ($) =>
+ seq(choice($._binding_kind_and_pattern,
+ seq("case",
+ $._binding_pattern_no_expr)),
+ optional($.type_annotation)),
+ value_binding_pattern : ($) => field("mutability", choice("var", "let")),
+ _possibly_async_binding_pattern_kind : ($) =>
+ seq(optional($._async_modifier),
+ $.value_binding_pattern),
+ _binding_kind_and_pattern : ($) =>
+ seq($._possibly_async_binding_pattern_kind,
+ $._no_expr_pattern_already_bound),
+ wildcard_pattern : ($) => "_",
+ _tuple_pattern_item : ($) => choice(
+ seq($.simple_identifier,
+ seq(":", alias($._binding_pattern_with_expr,
+ $.pattern))),
+ alias($._binding_pattern_with_expr, $.pattern)),
+ _tuple_pattern : ($) => seq("(", sep1Opt($._tuple_pattern_item, ","), ")"),
+ _case_pattern : ($) => seq(
+ optional("case"),
+ optional($.user_type), // XXX this should just be _type
+ // but that creates ambiguity
+ $._dot, $.simple_identifier, optional($._tuple_pattern)),
+ _type_casting_pattern : ($) => choice(
+ seq("is", $._type),
+ seq(alias($._binding_pattern_no_expr, $.pattern),
+ $._as, $._type)),
+ _binding_pattern : ($) =>
+ seq(seq(optional("case"), $.value_binding_pattern),
+ $._no_expr_pattern_already_bound),
+
+ // ==========
+ // Modifiers
+ // ==========
+ modifiers : ($) =>
+ repeat1(prec.left(choice($._non_local_scope_modifier,
+ $._locally_permitted_modifiers))),
+ _locally_permitted_modifiers : ($) => repeat1(
+ choice($.attribute,
+ $._locally_permitted_modifier)),
+ parameter_modifiers : ($) => repeat1($.parameter_modifier),
+ _modifier : ($) => choice($._non_local_scope_modifier,
+ $._locally_permitted_modifier),
+ _non_local_scope_modifier : ($) => choice(
+ $.member_modifier, $.visibility_modifier,
+ $.function_modifier, $.mutation_modifier,
+ $.property_modifier, $.parameter_modifier),
+ _locally_permitted_modifier : ($) => choice($.ownership_modifier,
+ $.inheritance_modifier,
+ $.property_behavior_modifier),
+ property_behavior_modifier : ($) => "lazy",
+ type_modifiers : ($) => repeat1($.attribute),
+ member_modifier : ($) => choice("override", "convenience", "required",
+ "nonisolated"),
+ visibility_modifier : ($) => seq(choice("public", "private", "internal",
+ "fileprivate", "open", "package"),
+ optional(seq("(", "set", ")"))),
+ type_parameter_modifiers : ($) => repeat1($.attribute),
+ function_modifier : ($) => choice("infix", "postfix", "prefix"),
+ mutation_modifier : ($) => choice("mutating", "nonmutating"),
+ property_modifier : ($) => choice("static", "dynamic", "optional", "class",
+ "distributed"),
+ inheritance_modifier : ($) => choice("final"),
+ parameter_modifier : ($) => choice("inout", "@escaping", "@autoclosure",
+ $._parameter_ownership_modifier),
+ ownership_modifier : ($) => choice("weak", "unowned", "unowned(safe)",
+ "unowned(unsafe)"),
+ _parameter_ownership_modifier : ($) => choice("borrowing", "consuming"),
+ use_site_target : ($) => seq(choice("property", "get", "set", "receiver",
+ "param", "setparam", "delegate"),
+ ":"),
+ directive : ($) =>
+ prec.right(PRECS.comment,
+ choice(seq(alias($._directive_if, "#if"),
+ $._compilation_condition),
+ seq(alias($._directive_elseif, "#elseif"),
+ $._compilation_condition),
+ seq(alias($._directive_else, "#else")),
+ seq(alias($._directive_endif, "#endif")))),
+ _compilation_condition : ($) => prec.right(choice(
+ seq("os", "(", $.simple_identifier, ")"),
+ seq("arch", "(", $.simple_identifier, ")"),
+ seq("swift", "(", $._comparison_operator,
+ sep1($.integer_literal, "."), ")"),
+ seq("compiler", "(", $._comparison_operator,
+ sep1($.integer_literal, "."), ")"),
+ seq("canImport", "(",
+ sep1($.simple_identifier, "."), ")"),
+ seq("targetEnvironment", "(",
+ $.simple_identifier, ")"),
+ $.boolean_literal, $.simple_identifier,
+ seq("(", $._compilation_condition, ")"),
+ seq("!", $._compilation_condition),
+ seq($._compilation_condition,
+ $._conjunction_operator,
+ $._compilation_condition),
+ seq($._compilation_condition,
+ $._disjunction_operator,
+ $._compilation_condition))),
+ diagnostic : ($) => prec(
+ PRECS.comment,
+ seq($._hash_symbol,
+ choice(
+ // Using regexes here, rather than actually
+ // validating the string literal, because complex
+ // string literals cannot be used inside `token()`
+ // and we need that to ensure we get the right
+ // precedence.
+ seq(/error([^\r\n]*)/), seq(/warning([^\r\n]*)/),
+ seq(/sourceLocation([^\r\n]*)/)))),
+ // Dumping ground for any nodes that used to exist in the grammar, but have
+ // since been removed for whatever
+ // reason.
+ // Neovim applies updates non-atomically to the parser and the queries.
+ // Meanwhile, `tree-sitter` rejects any query
+ // that contains any unrecognized nodes. Putting those two facts together,
+ // we see that we must never remove nodes
+ // that once existed.
+ unused_for_backward_compatibility : ($) => choice(alias("unused1", "try?"),
+ alias("unused2", "try!")),
+ },
+});
+function sep1(rule, separator) {
+ return seq(rule, repeat(seq(separator, rule)));
+}
+function sep1Opt(rule, separator) {
+ return seq(rule, repeat(seq(separator, rule)), optional(separator));
+}
+
+function tree_sitter_version_supports_emoji() {
+ try {
+ return (TREE_SITTER_CLI_VERSION_MAJOR > 0 ||
+ TREE_SITTER_CLI_VERSION_MINOR > 20 ||
+ TREE_SITTER_CLI_VERSION_PATCH >= 5);
+ } catch (err) {
+ if (err instanceof ReferenceError) {
+ return false;
+ } else {
+ throw err;
+ }
+ }
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/highlights.scm b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/highlights.scm
new file mode 100644
index 0000000000000..82ad68d4ed1c8
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/highlights.scm
@@ -0,0 +1,336 @@
+[
+ "."
+ ";"
+ ":"
+ ","
+] @punctuation.delimiter
+
+[
+ "("
+ ")"
+ "["
+ "]"
+ "{"
+ "}"
+] @punctuation.bracket
+
+; Identifiers
+(type_identifier) @type
+
+[
+ (self_expression)
+ (super_expression)
+] @variable.builtin
+
+; Declarations
+[
+ "func"
+ "deinit"
+] @keyword.function
+
+[
+ (visibility_modifier)
+ (member_modifier)
+ (function_modifier)
+ (property_modifier)
+ (parameter_modifier)
+ (inheritance_modifier)
+ (mutation_modifier)
+] @keyword.modifier
+
+(simple_identifier) @variable
+
+(function_declaration
+ (simple_identifier) @function.method)
+
+(protocol_function_declaration
+ name: (simple_identifier) @function.method)
+
+(init_declaration
+ "init" @constructor)
+
+(parameter
+ external_name: (simple_identifier) @variable.parameter)
+
+(parameter
+ name: (simple_identifier) @variable.parameter)
+
+(type_parameter
+ (type_identifier) @variable.parameter)
+
+(inheritance_constraint
+ (identifier
+ (simple_identifier) @variable.parameter))
+
+(equality_constraint
+ (identifier
+ (simple_identifier) @variable.parameter))
+
+[
+ "protocol"
+ "extension"
+ "indirect"
+ "nonisolated"
+ "override"
+ "convenience"
+ "required"
+ "some"
+ "any"
+ "weak"
+ "unowned"
+ "didSet"
+ "willSet"
+ "subscript"
+ "let"
+ "var"
+ (throws)
+ (where_keyword)
+ (getter_specifier)
+ (setter_specifier)
+ (modify_specifier)
+ (else)
+ (as_operator)
+] @keyword
+
+[
+ "enum"
+ "struct"
+ "class"
+ "typealias"
+] @keyword.type
+
+[
+ "async"
+ "await"
+] @keyword.coroutine
+
+(shebang_line) @keyword.directive
+
+(class_body
+ (property_declaration
+ (pattern
+ (simple_identifier) @variable.member)))
+
+(protocol_property_declaration
+ (pattern
+ (simple_identifier) @variable.member))
+
+(navigation_expression
+ (navigation_suffix
+ (simple_identifier) @variable.member))
+
+(value_argument
+ name: (value_argument_label
+ (simple_identifier) @variable.member))
+
+(import_declaration
+ "import" @keyword.import)
+
+(enum_entry
+ "case" @keyword)
+
+(modifiers
+ (attribute
+ "@" @attribute
+ (user_type
+ (type_identifier) @attribute)))
+
+; Function calls
+(call_expression
+ (simple_identifier) @function.call) ; foo()
+
+(call_expression
+ ; foo.bar.baz(): highlight the baz()
+ (navigation_expression
+ (navigation_suffix
+ (simple_identifier) @function.call)))
+
+(call_expression
+ (prefix_expression
+ (simple_identifier) @function.call)) ; .foo()
+
+((navigation_expression
+ (simple_identifier) @type) ; SomeType.method(): highlight SomeType as a type
+ (#match? @type "^[A-Z]"))
+
+(directive) @keyword.directive
+
+; See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Keywords-and-Punctuation
+[
+ (diagnostic)
+ (availability_condition)
+ (playground_literal)
+ (key_path_string_expression)
+ (selector_expression)
+ (external_macro_definition)
+] @function.macro
+
+(special_literal) @constant.macro
+
+; Statements
+(for_statement
+ "for" @keyword.repeat)
+
+(for_statement
+ "in" @keyword.repeat)
+
+[
+ "while"
+ "repeat"
+ "continue"
+ "break"
+] @keyword.repeat
+
+(guard_statement
+ "guard" @keyword.conditional)
+
+(if_statement
+ "if" @keyword.conditional)
+
+(switch_statement
+ "switch" @keyword.conditional)
+
+(switch_entry
+ "case" @keyword)
+
+(switch_entry
+ "fallthrough" @keyword)
+
+(switch_entry
+ (default_keyword) @keyword)
+
+"return" @keyword.return
+
+(ternary_expression
+ [
+ "?"
+ ":"
+ ] @keyword.conditional.ternary)
+
+[
+ (try_operator)
+ "do"
+ (throw_keyword)
+ (catch_keyword)
+] @keyword.exception
+
+(statement_label) @label
+
+; Comments
+[
+ (comment)
+ (multiline_comment)
+] @comment @spell
+
+((comment) @comment.documentation
+ (#match? @comment.documentation "^///[^/]"))
+
+((comment) @comment.documentation
+ (#match? @comment.documentation "^///$"))
+
+((multiline_comment) @comment.documentation
+ (#match? @comment.documentation "^/[*][*][^*].*[*]/$"))
+
+; String literals
+(line_str_text) @string
+
+(str_escaped_char) @string.escape
+
+(multi_line_str_text) @string
+
+(raw_str_part) @string
+
+(raw_str_end_part) @string
+
+(line_string_literal
+ [
+ "\\("
+ ")"
+ ] @punctuation.special)
+
+(multi_line_string_literal
+ [
+ "\\("
+ ")"
+ ] @punctuation.special)
+
+(raw_str_interpolation
+ [
+ (raw_str_interpolation_start)
+ ")"
+ ] @punctuation.special)
+
+[
+ "\""
+ "\"\"\""
+] @string
+
+; Lambda literals
+(lambda_literal
+ "in" @keyword.operator)
+
+; Basic literals
+[
+ (integer_literal)
+ (hex_literal)
+ (oct_literal)
+ (bin_literal)
+] @number
+
+(real_literal) @number.float
+
+(boolean_literal) @boolean
+
+"nil" @constant.builtin
+
+(wildcard_pattern) @character.special
+
+; Regex literals
+(regex_literal) @string.regexp
+
+; Operators
+(custom_operator) @operator
+
+[
+ "+"
+ "-"
+ "*"
+ "/"
+ "%"
+ "="
+ "+="
+ "-="
+ "*="
+ "/="
+ "<"
+ ">"
+ "<<"
+ ">>"
+ "<="
+ ">="
+ "++"
+ "--"
+ "^"
+ "&"
+ "&&"
+ "|"
+ "||"
+ "~"
+ "%="
+ "!="
+ "!=="
+ "=="
+ "==="
+ "?"
+ "??"
+ "->"
+ "..<"
+ "..."
+ (bang)
+] @operator
+
+(type_arguments
+ [
+ "<"
+ ">"
+ ] @punctuation.bracket)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/scanner.c b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/scanner.c
new file mode 100644
index 0000000000000..dcb6bf802f45c
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/scanner.c
@@ -0,0 +1,865 @@
+#include "tree_sitter/parser.h"
+#include <string.h>
+#include <wctype.h>
+
+#define TOKEN_COUNT 33
+
+enum TokenType {
+ BLOCK_COMMENT,
+ RAW_STR_PART,
+ RAW_STR_CONTINUING_INDICATOR,
+ RAW_STR_END_PART,
+ IMPLICIT_SEMI,
+ EXPLICIT_SEMI,
+ ARROW_OPERATOR,
+ DOT_OPERATOR,
+ CONJUNCTION_OPERATOR,
+ DISJUNCTION_OPERATOR,
+ NIL_COALESCING_OPERATOR,
+ EQUAL_SIGN,
+ EQ_EQ,
+ PLUS_THEN_WS,
+ MINUS_THEN_WS,
+ BANG,
+ THROWS_KEYWORD,
+ RETHROWS_KEYWORD,
+ DEFAULT_KEYWORD,
+ WHERE_KEYWORD,
+ ELSE_KEYWORD,
+ CATCH_KEYWORD,
+ AS_KEYWORD,
+ AS_QUEST,
+ AS_BANG,
+ ASYNC_KEYWORD,
+ CUSTOM_OPERATOR,
+ HASH_SYMBOL,
+ DIRECTIVE_IF,
+ DIRECTIVE_ELSEIF,
+ DIRECTIVE_ELSE,
+ DIRECTIVE_ENDIF,
+ FAKE_TRY_BANG
+};
+
+#define OPERATOR_COUNT 20
+
+const char *OPERATORS[OPERATOR_COUNT] = {
+ "->", ".", "&&", "||", "??", "=", "==",
+ "+", "-", "!", "throws", "rethrows", "default", "where",
+ "else", "catch", "as", "as?", "as!", "async"};
+
+enum IllegalTerminatorGroup {
+ ALPHANUMERIC,
+ OPERATOR_SYMBOLS,
+ OPERATOR_OR_DOT,
+ NON_WHITESPACE
+};
+
+const enum IllegalTerminatorGroup OP_ILLEGAL_TERMINATORS[OPERATOR_COUNT] = {
+ OPERATOR_SYMBOLS, // ->
+ OPERATOR_OR_DOT, // .
+ OPERATOR_SYMBOLS, // &&
+ OPERATOR_SYMBOLS, // ||
+ OPERATOR_SYMBOLS, // ??
+ OPERATOR_SYMBOLS, // =
+ OPERATOR_SYMBOLS, // ==
+ NON_WHITESPACE, // +
+ NON_WHITESPACE, // -
+ OPERATOR_SYMBOLS, // !
+ ALPHANUMERIC, // throws
+ ALPHANUMERIC, // rethrows
+ ALPHANUMERIC, // default
+ ALPHANUMERIC, // where
+ ALPHANUMERIC, // else
+ ALPHANUMERIC, // catch
+ ALPHANUMERIC, // as
+ OPERATOR_SYMBOLS, // as?
+ OPERATOR_SYMBOLS, // as!
+ ALPHANUMERIC // async
+};
+
+const enum TokenType OP_SYMBOLS[OPERATOR_COUNT] = {ARROW_OPERATOR,
+ DOT_OPERATOR,
+ CONJUNCTION_OPERATOR,
+ DISJUNCTION_OPERATOR,
+ NIL_COALESCING_OPERATOR,
+ EQUAL_SIGN,
+ EQ_EQ,
+ PLUS_THEN_WS,
+ MINUS_THEN_WS,
+ BANG,
+ THROWS_KEYWORD,
+ RETHROWS_KEYWORD,
+ DEFAULT_KEYWORD,
+ WHERE_KEYWORD,
+ ELSE_KEYWORD,
+ CATCH_KEYWORD,
+ AS_KEYWORD,
+ AS_QUEST,
+ AS_BANG,
+ ASYNC_KEYWORD};
+
+const uint64_t OP_SYMBOL_SUPPRESSOR[OPERATOR_COUNT] = {
+ 0, // ARROW_OPERATOR,
+ 0, // DOT_OPERATOR,
+ 0, // CONJUNCTION_OPERATOR,
+ 0, // DISJUNCTION_OPERATOR,
+ 0, // NIL_COALESCING_OPERATOR,
+ 0, // EQUAL_SIGN,
+ 0, // EQ_EQ,
+ 0, // PLUS_THEN_WS,
+ 0, // MINUS_THEN_WS,
+ 1UL << FAKE_TRY_BANG, // BANG,
+ 0, // THROWS_KEYWORD,
+ 0, // RETHROWS_KEYWORD,
+ 0, // DEFAULT_KEYWORD,
+ 0, // WHERE_KEYWORD,
+ 0, // ELSE_KEYWORD,
+ 0, // CATCH_KEYWORD,
+ 0, // AS_KEYWORD,
+ 0, // AS_QUEST,
+ 0, // AS_BANG,
+ 0, // ASYNC_KEYWORD
+};
+
+#define RESERVED_OP_COUNT 31
+
+const char *RESERVED_OPS[RESERVED_OP_COUNT] = {
+ "/", "=", "-", "+", "!", "*", "%", "<", ">", "&", "|",
+ "^", "?", "~", ".", "..", "->", "/*", "*/", "+=", "-=", "*=",
+ "/=", "%=", ">>", "<<", "++", "--", "===", "...", "..<"};
+
+static bool is_cross_semi_token(enum TokenType op) {
+ switch (op) {
+ case ARROW_OPERATOR:
+ case DOT_OPERATOR:
+ case CONJUNCTION_OPERATOR:
+ case DISJUNCTION_OPERATOR:
+ case NIL_COALESCING_OPERATOR:
+ case EQUAL_SIGN:
+ case EQ_EQ:
+ case PLUS_THEN_WS:
+ case MINUS_THEN_WS:
+ case THROWS_KEYWORD:
+ case RETHROWS_KEYWORD:
+ case DEFAULT_KEYWORD:
+ case WHERE_KEYWORD:
+ case ELSE_KEYWORD:
+ case CATCH_KEYWORD:
+ case AS_KEYWORD:
+ case AS_QUEST:
+ case AS_BANG:
+ case ASYNC_KEYWORD:
+ case CUSTOM_OPERATOR:
+ return true;
+ case BANG:
+ default:
+ return false;
+ }
+}
+
+#define NON_CONSUMING_CROSS_SEMI_CHAR_COUNT 3
+const uint32_t
+ NON_CONSUMING_CROSS_SEMI_CHARS[NON_CONSUMING_CROSS_SEMI_CHAR_COUNT] = {
+ '?', ':', '{'};
+
+/**
+ * All possible results of having performed some sort of parsing.
+ *
+ * A parser can return a result along two dimensions:
+ * 1. Should the scanner continue trying to find another result?
+ * 2. Was some result produced by this parsing attempt?
+ *
+ * These are flattened into a single enum together. When the function returns
+ * one of the `TOKEN_FOUND` cases, it will always populate its `symbol_result`
+ * field. When it returns one of the `STOP_PARSING` cases, callers should
+ * immediately return (with the value, if there is one).
+ */
+enum ParseDirective {
+ CONTINUE_PARSING_NOTHING_FOUND,
+ CONTINUE_PARSING_TOKEN_FOUND,
+ CONTINUE_PARSING_SLASH_CONSUMED,
+ STOP_PARSING_NOTHING_FOUND,
+ STOP_PARSING_TOKEN_FOUND,
+ STOP_PARSING_END_OF_FILE
+};
+
+struct ScannerState {
+ uint32_t ongoing_raw_str_hash_count;
+};
+
+void *tree_sitter_swift_external_scanner_create() {
+ return calloc(0, sizeof(struct ScannerState));
+}
+
+void tree_sitter_swift_external_scanner_destroy(void *payload) {
+ free(payload);
+}
+
+void tree_sitter_swift_external_scanner_reset(void *payload) {
+ struct ScannerState *state = (struct ScannerState *)payload;
+ state->ongoing_raw_str_hash_count = 0;
+}
+
+unsigned tree_sitter_swift_external_scanner_serialize(void *payload,
+ char *buffer) {
+ struct ScannerState *state = (struct ScannerState *)payload;
+ uint32_t hash_count = state->ongoing_raw_str_hash_count;
+ buffer[0] = (hash_count >> 24) & 0xff;
+ buffer[1] = (hash_count >> 16) & 0xff;
+ buffer[2] = (hash_count >> 8) & 0xff;
+ buffer[3] = (hash_count) & 0xff;
+ return 4;
+}
+
+void tree_sitter_swift_external_scanner_deserialize(void *payload,
+ const char *buffer,
+ unsigned length) {
+ if (length < 4) {
+ return;
+ }
+
+ uint32_t hash_count =
+ ((((uint32_t)buffer[0]) << 24) | (((uint32_t)buffer[1]) << 16) |
+ (((uint32_t)buffer[2]) << 8) | (((uint32_t)buffer[3])));
+ struct ScannerState *state = (struct ScannerState *)payload;
+ state->ongoing_raw_str_hash_count = hash_count;
+}
+
+static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static bool should_treat_as_wspace(int32_t character) {
+ return iswspace(character) || (((int32_t)';') == character);
+}
+
+static int32_t encountered_op_count(bool *encountered_operator) {
+ int32_t encountered = 0;
+ for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
+ if (encountered_operator[op_idx]) {
+ encountered++;
+ }
+ }
+
+ return encountered;
+}
+
+static bool any_reserved_ops(uint8_t *encountered_reserved_ops) {
+ for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
+ if (encountered_reserved_ops[op_idx] == 2) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool is_legal_custom_operator(int32_t char_idx, int32_t first_char,
+ int32_t cur_char) {
+ bool is_first_char = !char_idx;
+ switch (cur_char) {
+ case '=':
+ case '-':
+ case '+':
+ case '!':
+ case '%':
+ case '<':
+ case '>':
+ case '&':
+ case '|':
+ case '^':
+ case '?':
+ case '~':
+ return true;
+ case '.':
+ // Grammar allows `.` for any operator that starts with `.`
+ return is_first_char || first_char == '.';
+ case '*':
+ case '/':
+ // Not listed in the grammar, but `/*` and `//` can't be the start of an
+ // operator since they start comments
+ return char_idx != 1 || first_char != '/';
+ default:
+ if ((cur_char >= 0x00A1 && cur_char <= 0x00A7) || (cur_char == 0x00A9) ||
+ (cur_char == 0x00AB) || (cur_char == 0x00AC) || (cur_char == 0x00AE) ||
+ (cur_char >= 0x00B0 && cur_char <= 0x00B1) || (cur_char == 0x00B6) ||
+ (cur_char == 0x00BB) || (cur_char == 0x00BF) || (cur_char == 0x00D7) ||
+ (cur_char == 0x00F7) || (cur_char >= 0x2016 && cur_char <= 0x2017) ||
+ (cur_char >= 0x2020 && cur_char <= 0x2027) ||
+ (cur_char >= 0x2030 && cur_char <= 0x203E) ||
+ (cur_char >= 0x2041 && cur_char <= 0x2053) ||
+ (cur_char >= 0x2055 && cur_char <= 0x205E) ||
+ (cur_char >= 0x2190 && cur_char <= 0x23FF) ||
+ (cur_char >= 0x2500 && cur_char <= 0x2775) ||
+ (cur_char >= 0x2794 && cur_char <= 0x2BFF) ||
+ (cur_char >= 0x2E00 && cur_char <= 0x2E7F) ||
+ (cur_char >= 0x3001 && cur_char <= 0x3003) ||
+ (cur_char >= 0x3008 && cur_char <= 0x3020) || (cur_char == 0x3030)) {
+ return true;
+ } else if ((cur_char >= 0x0300 && cur_char <= 0x036f) ||
+ (cur_char >= 0x1DC0 && cur_char <= 0x1DFF) ||
+ (cur_char >= 0x20D0 && cur_char <= 0x20FF) ||
+ (cur_char >= 0xFE00 && cur_char <= 0xFE0F) ||
+ (cur_char >= 0xFE20 && cur_char <= 0xFE2F) ||
+ (cur_char >= 0xE0100 && cur_char <= 0xE01EF)) {
+ return !is_first_char;
+ } else {
+ return false;
+ }
+ }
+}
+
+static bool eat_operators(TSLexer *lexer, const bool *valid_symbols,
+ bool mark_end, const int32_t prior_char,
+ enum TokenType *symbol_result) {
+ bool possible_operators[OPERATOR_COUNT];
+ uint8_t reserved_operators[RESERVED_OP_COUNT];
+ for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
+ possible_operators[op_idx] =
+ valid_symbols[OP_SYMBOLS[op_idx]] &&
+ (!prior_char || OPERATORS[op_idx][0] == prior_char);
+ }
+ for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
+ reserved_operators[op_idx] =
+ !prior_char || RESERVED_OPS[op_idx][0] == prior_char;
+ }
+
+ bool possible_custom_operator = valid_symbols[CUSTOM_OPERATOR];
+ int32_t first_char = prior_char ? prior_char : lexer->lookahead;
+ int32_t last_examined_char = first_char;
+
+ int32_t str_idx = prior_char ? 1 : 0;
+ int32_t full_match = -1;
+ while (true) {
+ for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
+ if (!possible_operators[op_idx]) {
+ continue;
+ }
+
+ if (OPERATORS[op_idx][str_idx] == '\0') {
+ // Make sure that the operator is allowed to have the next character as
+ // its lookahead.
+ enum IllegalTerminatorGroup illegal_terminators =
+ OP_ILLEGAL_TERMINATORS[op_idx];
+ switch (lexer->lookahead) {
+ // See "Operators":
+ // https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html#ID418
+ case '/':
+ case '=':
+ case '-':
+ case '+':
+ case '!':
+ case '*':
+ case '%':
+ case '<':
+ case '>':
+ case '&':
+ case '|':
+ case '^':
+ case '?':
+ case '~':
+ if (illegal_terminators == OPERATOR_SYMBOLS) {
+ break;
+ } // Otherwise, intentionally fall through to the OPERATOR_OR_DOT case
+ // fall through
+ case '.':
+ if (illegal_terminators == OPERATOR_OR_DOT) {
+ break;
+ } // Otherwise, fall through to DEFAULT which checks its groups
+ // directly
+ // fall through
+ default:
+ if (iswalnum(lexer->lookahead) &&
+ illegal_terminators == ALPHANUMERIC) {
+ break;
+ }
+
+ if (!iswspace(lexer->lookahead) &&
+ illegal_terminators == NON_WHITESPACE) {
+ break;
+ }
+
+ full_match = op_idx;
+ if (mark_end) {
+ lexer->mark_end(lexer);
+ }
+ }
+
+ possible_operators[op_idx] = false;
+ continue;
+ }
+
+ if (OPERATORS[op_idx][str_idx] != lexer->lookahead) {
+ possible_operators[op_idx] = false;
+ continue;
+ }
+ }
+
+ for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
+ if (!reserved_operators[op_idx]) {
+ continue;
+ }
+
+ if (RESERVED_OPS[op_idx][str_idx] == '\0') {
+ reserved_operators[op_idx] = 0;
+ continue;
+ }
+
+ if (RESERVED_OPS[op_idx][str_idx] != lexer->lookahead) {
+ reserved_operators[op_idx] = 0;
+ continue;
+ }
+
+ if (RESERVED_OPS[op_idx][str_idx + 1] == '\0') {
+ reserved_operators[op_idx] = 2;
+ continue;
+ }
+ }
+
+ possible_custom_operator =
+ possible_custom_operator &&
+ is_legal_custom_operator(str_idx, first_char, lexer->lookahead);
+
+ uint32_t encountered_ops = encountered_op_count(possible_operators);
+ if (encountered_ops == 0) {
+ if (!possible_custom_operator) {
+ break;
+ } else if (mark_end && full_match == -1) {
+ lexer->mark_end(lexer);
+ }
+ }
+
+ last_examined_char = lexer->lookahead;
+ lexer->advance(lexer, false);
+ str_idx += 1;
+
+ if (encountered_ops == 0 &&
+ !is_legal_custom_operator(str_idx, first_char, lexer->lookahead)) {
+ break;
+ }
+ }
+
+ if (full_match != -1) {
+ // We have a match -- first see if that match has a symbol that suppresses
+ // it. For example, in `try!`, we do not want to emit the `!` as a symbol in
+ // our scanner, because we want the parser to have the chance to parse it as
+ // an immediate token.
+ uint64_t suppressing_symbols = OP_SYMBOL_SUPPRESSOR[full_match];
+ if (suppressing_symbols) {
+ for (uint64_t suppressor = 0; suppressor < TOKEN_COUNT; suppressor++) {
+ if (!(suppressing_symbols & 1 << suppressor)) {
+ continue;
+ }
+
+ // The suppressing symbol is valid in this position, so skip it.
+ if (valid_symbols[suppressor]) {
+ return false;
+ }
+ }
+ }
+ *symbol_result = OP_SYMBOLS[full_match];
+ return true;
+ }
+
+ if (possible_custom_operator && !any_reserved_ops(reserved_operators)) {
+ if ((last_examined_char != '<' || iswspace(lexer->lookahead)) && mark_end) {
+ lexer->mark_end(lexer);
+ }
+ *symbol_result = CUSTOM_OPERATOR;
+ return true;
+ }
+
+ return false;
+}
+
+static enum ParseDirective eat_comment(TSLexer *lexer,
+ const bool *valid_symbols, bool mark_end,
+ enum TokenType *symbol_result) {
+ if (lexer->lookahead != '/') {
+ return CONTINUE_PARSING_NOTHING_FOUND;
+ }
+
+ advance(lexer);
+
+ if (lexer->lookahead != '*') {
+ return CONTINUE_PARSING_SLASH_CONSUMED;
+ }
+
+ advance(lexer);
+
+ bool after_star = false;
+ unsigned nesting_depth = 1;
+ for (;;) {
+ switch (lexer->lookahead) {
+ case '\0':
+ return STOP_PARSING_END_OF_FILE;
+ case '*':
+ advance(lexer);
+ after_star = true;
+ break;
+ case '/':
+ if (after_star) {
+ advance(lexer);
+ after_star = false;
+ nesting_depth--;
+ if (nesting_depth == 0) {
+ if (mark_end) {
+ lexer->mark_end(lexer);
+ }
+ *symbol_result = BLOCK_COMMENT;
+ return STOP_PARSING_TOKEN_FOUND;
+ }
+ } else {
+ advance(lexer);
+ after_star = false;
+ if (lexer->lookahead == '*') {
+ nesting_depth++;
+ advance(lexer);
+ }
+ }
+ break;
+ default:
+ advance(lexer);
+ after_star = false;
+ break;
+ }
+ }
+}
+
+static enum ParseDirective eat_whitespace(TSLexer *lexer,
+ const bool *valid_symbols,
+ enum TokenType *symbol_result) {
+ enum ParseDirective ws_directive = CONTINUE_PARSING_NOTHING_FOUND;
+ bool semi_is_valid =
+ valid_symbols[IMPLICIT_SEMI] && valid_symbols[EXPLICIT_SEMI];
+ uint32_t lookahead;
+ while (should_treat_as_wspace(lookahead = lexer->lookahead)) {
+ if (lookahead == ';') {
+ if (semi_is_valid) {
+ ws_directive = STOP_PARSING_TOKEN_FOUND;
+ lexer->advance(lexer, false);
+ }
+
+ break;
+ }
+
+ lexer->advance(lexer, true);
+
+ lexer->mark_end(lexer);
+
+ if (ws_directive == CONTINUE_PARSING_NOTHING_FOUND &&
+ (lookahead == '\n' || lookahead == '\r')) {
+ ws_directive = CONTINUE_PARSING_TOKEN_FOUND;
+ }
+ }
+
+ enum ParseDirective any_comment = CONTINUE_PARSING_NOTHING_FOUND;
+ if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND && lookahead == '/') {
+ bool has_seen_single_comment = false;
+ while (lexer->lookahead == '/') {
+ // It's possible that this is a comment - start an exploratory mission to
+ // find out, and if it is, look for what comes after it. We care about
+ // what comes after it for the purpose of suppressing the newline.
+
+ enum TokenType multiline_comment_result;
+ any_comment = eat_comment(lexer, valid_symbols, /* mark_end */ false,
+ &multiline_comment_result);
+ if (any_comment == STOP_PARSING_TOKEN_FOUND) {
+ // This is a multiline comment. This scanner should be parsing those, so
+ // we might want to bail out and emit it instead. However, we only want
+ // to do that if we haven't advanced through a _single_ line comment on
+ // the way - otherwise that will get lumped into this.
+ if (!has_seen_single_comment) {
+ lexer->mark_end(lexer);
+ *symbol_result = multiline_comment_result;
+ return STOP_PARSING_TOKEN_FOUND;
+ }
+ } else if (any_comment == STOP_PARSING_END_OF_FILE) {
+ return STOP_PARSING_END_OF_FILE;
+ } else if (any_comment == CONTINUE_PARSING_SLASH_CONSUMED) {
+ // We accidentally ate a slash -- we should actually bail out, say we
+ // saw nothing, and let the next pass take it from after the newline.
+ return CONTINUE_PARSING_SLASH_CONSUMED;
+ } else if (lexer->lookahead == '/') {
+ // There wasn't a multiline comment, which we know means that the
+ // comment parser ate its `/` and then bailed out. If it had seen
+ // anything comment-like after that first `/` it would have continued
+ // going and eventually had a well-formed comment or an EOF. Thus, if
+ // we're currently looking at a `/`, it's the second one of those and it
+ // means we have a single-line comment.
+ has_seen_single_comment = true;
+ while (lexer->lookahead != '\n' && lexer->lookahead != '\0') {
+ lexer->advance(lexer, true);
+ }
+ } else if (iswspace(lexer->lookahead)) {
+ // We didn't see any type of comment - in fact, we saw an operator that
+ // we don't normally treat as an operator. Still, this is a reason to
+ // stop parsing.
+ return STOP_PARSING_NOTHING_FOUND;
+ }
+
+ // If we skipped through some comment, we're at whitespace now, so
+ // advance.
+ while (iswspace(lexer->lookahead)) {
+ any_comment = CONTINUE_PARSING_NOTHING_FOUND; // We're advancing, so
+ // clear out the comment
+ lexer->advance(lexer, true);
+ }
+ }
+
+ enum TokenType operator_result;
+ bool saw_operator =
+ eat_operators(lexer, valid_symbols,
+ /* mark_end */ false, '\0', &operator_result);
+ if (saw_operator) {
+ // The operator we saw should suppress the newline, so bail out.
+ return STOP_PARSING_NOTHING_FOUND;
+ } else {
+ // Promote the implicit newline to an explicit one so we don't check for
+ // operators again.
+ *symbol_result = IMPLICIT_SEMI;
+ ws_directive = STOP_PARSING_TOKEN_FOUND;
+ }
+ }
+
+ // Let's consume operators that can live after a "semicolon" style newline.
+ // Before we do that, though, we want to check for a set of characters that we
+ // do not consume, but that still suppress the semi.
+ if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND) {
+ for (int i = 0; i < NON_CONSUMING_CROSS_SEMI_CHAR_COUNT; i++) {
+ if (NON_CONSUMING_CROSS_SEMI_CHARS[i] == lookahead) {
+ return CONTINUE_PARSING_NOTHING_FOUND;
+ }
+ }
+ }
+
+ if (semi_is_valid && ws_directive != CONTINUE_PARSING_NOTHING_FOUND) {
+ *symbol_result = lookahead == ';' ? EXPLICIT_SEMI : IMPLICIT_SEMI;
+ return ws_directive;
+ }
+
+ return CONTINUE_PARSING_NOTHING_FOUND;
+}
+
+#define DIRECTIVE_COUNT 4
+const char *DIRECTIVES[OPERATOR_COUNT] = {"if", "elseif", "else", "endif"};
+
+const enum TokenType DIRECTIVE_SYMBOLS[DIRECTIVE_COUNT] = {
+ DIRECTIVE_IF, DIRECTIVE_ELSEIF, DIRECTIVE_ELSE, DIRECTIVE_ENDIF};
+
+static enum TokenType find_possible_compiler_directive(TSLexer *lexer) {
+ bool possible_directives[DIRECTIVE_COUNT];
+ for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
+ possible_directives[dir_idx] = true;
+ }
+
+ int32_t str_idx = 0;
+ int32_t full_match = -1;
+ while (true) {
+ for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
+ if (!possible_directives[dir_idx]) {
+ continue;
+ }
+
+ uint8_t expected_char = DIRECTIVES[dir_idx][str_idx];
+ if (expected_char == '\0') {
+ full_match = dir_idx;
+ lexer->mark_end(lexer);
+ }
+
+ if (expected_char != lexer->lookahead) {
+ possible_directives[dir_idx] = false;
+ continue;
+ }
+ }
+
+ uint8_t match_count = 0;
+ for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx += 1) {
+ if (possible_directives[dir_idx]) {
+ match_count += 1;
+ }
+ }
+
+ if (match_count == 0) {
+ break;
+ }
+
+ lexer->advance(lexer, false);
+ str_idx += 1;
+ }
+
+ if (full_match == -1) {
+ // No compiler directive found, so just match the starting symbol
+ return HASH_SYMBOL;
+ }
+
+ return DIRECTIVE_SYMBOLS[full_match];
+}
+
+static bool eat_raw_str_part(struct ScannerState *state, TSLexer *lexer,
+ const bool *valid_symbols,
+ enum TokenType *symbol_result) {
+ uint32_t hash_count = state->ongoing_raw_str_hash_count;
+ if (!valid_symbols[RAW_STR_PART]) {
+ return false;
+ } else if (hash_count == 0) {
+ // If this is a raw_str_part, it's the first one - look for hashes
+ while (lexer->lookahead == '#') {
+ hash_count += 1;
+ advance(lexer);
+ }
+
+ if (hash_count == 0) {
+ return false;
+ }
+
+ if (lexer->lookahead == '"') {
+ advance(lexer);
+ } else if (hash_count == 1) {
+ lexer->mark_end(lexer);
+ *symbol_result = find_possible_compiler_directive(lexer);
+ return true;
+ } else {
+ return false;
+ }
+
+ } else if (valid_symbols[RAW_STR_CONTINUING_INDICATOR]) {
+ // This is the end of an interpolation - now it's another raw_str_part. This
+ // is a synthetic marker to tell us that the grammar just consumed a `(`
+ // symbol to close a raw interpolation (since we don't want to fire on every
+ // `(` in existence). We don't have anything to do except continue.
+ } else {
+ return false;
+ }
+
+ // We're in a state where anything other than `hash_count` hash symbols in a
+ // row should be eaten and is part of a string. The last character _before_
+ // the hashes will tell us what happens next. Matters are also complicated by
+ // the fact that we don't want to consume every character we visit; if we see
+ // a `\#(`, for instance, with the appropriate number of hash symbols, we want
+ // to end our parsing _before_ that sequence. This allows highlighting tools
+ // to treat that as a separate token.
+ while (lexer->lookahead != '\0') {
+ uint8_t last_char = '\0';
+ lexer->mark_end(
+ lexer); // We always want to parse thru the start of the string so far
+ // Advance through anything that isn't a hash symbol, because we want to
+ // count those.
+ while (lexer->lookahead != '#' && lexer->lookahead != '\0') {
+ last_char = lexer->lookahead;
+ advance(lexer);
+ if (last_char != '\\' || lexer->lookahead == '\\') {
+ // Mark a new end, but only if we didn't just advance past a `\` symbol,
+ // since we don't want to consume that. Exception: if this is a `\` that
+ // happens _right after_ another `\`, we for some reason _do_ want to
+ // consume that, because apparently that is parsed as a literal `\`
+ // followed by something escaped.
+ lexer->mark_end(lexer);
+ }
+ }
+
+ // We hit at least one hash - count them and see if they match.
+ uint32_t current_hash_count = 0;
+ while (lexer->lookahead == '#' && current_hash_count < hash_count) {
+ current_hash_count += 1;
+ advance(lexer);
+ }
+
+ // If we saw exactly the right number of hashes, one of three things is
+ // true:
+ // 1. We're trying to interpolate into this string.
+ // 2. The string just ended.
+ // 3. This was just some hash characters doing nothing important.
+ if (current_hash_count == hash_count) {
+ if (last_char == '\\' && lexer->lookahead == '(') {
+ // Interpolation case! Don't consume those chars; they get saved for
+ // grammar.js.
+ *symbol_result = RAW_STR_PART;
+ state->ongoing_raw_str_hash_count = hash_count;
+ return true;
+ } else if (last_char == '"') {
+ // The string is finished! Mark the end here, on the very last hash
+ // symbol.
+ lexer->mark_end(lexer);
+ *symbol_result = RAW_STR_END_PART;
+ state->ongoing_raw_str_hash_count = 0;
+ return true;
+ }
+ // Nothing special happened - let the string continue.
+ }
+ }
+
+ return false;
+}
+
+bool tree_sitter_swift_external_scanner_scan(void *payload, TSLexer *lexer,
+ const bool *valid_symbols) {
+ // Figure out our scanner state
+ struct ScannerState *state = (struct ScannerState *)payload;
+
+ // Consume any whitespace at the start.
+ enum TokenType ws_result;
+ enum ParseDirective ws_directive =
+ eat_whitespace(lexer, valid_symbols, &ws_result);
+ if (ws_directive == STOP_PARSING_TOKEN_FOUND) {
+ lexer->result_symbol = ws_result;
+ return true;
+ }
+
+ if (ws_directive == STOP_PARSING_NOTHING_FOUND ||
+ ws_directive == STOP_PARSING_END_OF_FILE) {
+ return false;
+ }
+
+ bool has_ws_result = (ws_directive == CONTINUE_PARSING_TOKEN_FOUND);
+
+ // Now consume comments (before custom operators so that those aren't treated
+ // as comments)
+ enum TokenType comment_result;
+ enum ParseDirective comment =
+ ws_directive == CONTINUE_PARSING_SLASH_CONSUMED
+ ? ws_directive
+ : eat_comment(lexer, valid_symbols, /* mark_end */ true,
+ &comment_result);
+ if (comment == STOP_PARSING_TOKEN_FOUND) {
+ lexer->mark_end(lexer);
+ lexer->result_symbol = comment_result;
+ return true;
+ }
+
+ if (comment == STOP_PARSING_END_OF_FILE) {
+ return false;
+ }
+ // Now consume any operators that might cause our whitespace to be suppressed.
+ enum TokenType operator_result;
+ bool saw_operator =
+ eat_operators(lexer, valid_symbols,
+ /* mark_end */ !has_ws_result,
+ comment == CONTINUE_PARSING_SLASH_CONSUMED ? '/' : '\0',
+ &operator_result);
+
+ if (saw_operator &&
+ (!has_ws_result || is_cross_semi_token(operator_result))) {
+ lexer->result_symbol = operator_result;
+ if (has_ws_result)
+ lexer->mark_end(lexer);
+ return true;
+ }
+
+ if (has_ws_result) {
+ // Don't `mark_end`, since we may have advanced through some operators.
+ lexer->result_symbol = ws_result;
+ return true;
+ }
+
+ // NOTE: this will consume any `#` characters it sees, even if it does not
+ // find a result. Keep it at the end so that it doesn't interfere with special
+ // literals or selectors!
+ enum TokenType raw_str_result;
+ bool saw_raw_str_part =
+ eat_raw_str_part(state, lexer, valid_symbols, &raw_str_result);
+ if (saw_raw_str_part) {
+ lexer->result_symbol = raw_str_result;
+ return true;
+ }
+
+ return false;
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/tree-sitter.json b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/tree-sitter.json
new file mode 100644
index 0000000000000..b32e04e0424d4
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Swift/tree-sitter-swift/tree-sitter.json
@@ -0,0 +1,39 @@
+{
+ "grammars": [
+ {
+ "name": "swift",
+ "camelcase": "Swift",
+ "scope": "source.swift",
+ "path": ".",
+ "file-types": [
+ "swift"
+ ],
+ "highlights": "queries/highlights.scm",
+ "injections": "queries/injections.scm",
+ "locals": "queries/locals.scm",
+ "injection-regex": "swift"
+ }
+ ],
+ "metadata": {
+ "version": "0.7.1",
+ "license": "MIT",
+ "description": "A tree-sitter grammar for the Swift programming language.",
+ "authors": [
+ {
+ "name": "Alex Pinkus",
+ "email": "alex.pinkus at gmail.com"
+ }
+ ],
+ "links": {
+ "repository": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
+ }
+ },
+ "bindings": {
+ "c": true,
+ "go": true,
+ "node": true,
+ "python": true,
+ "rust": true,
+ "swift": true
+ }
+}
diff --git a/lldb/unittests/Highlighter/CMakeLists.txt b/lldb/unittests/Highlighter/CMakeLists.txt
index 255f7bccf3e73..a4fc679cc931f 100644
--- a/lldb/unittests/Highlighter/CMakeLists.txt
+++ b/lldb/unittests/Highlighter/CMakeLists.txt
@@ -1,3 +1,7 @@
+if (LLDB_ENABLE_TREESITTER)
+ set(SWIFT_HIGHLIGHTER_PLUGIN lldbPluginHighlighterTreeSitterSwift)
+endif()
+
add_lldb_unittest(HighlighterTests
HighlighterTest.cpp
@@ -7,4 +11,5 @@ add_lldb_unittest(HighlighterTests
lldbPluginCPlusPlusLanguage
lldbPluginObjCLanguage
lldbPluginObjCPlusPlusLanguage
+ ${SWIFT_HIGHLIGHTER_PLUGIN}
)
diff --git a/lldb/unittests/Highlighter/HighlighterTest.cpp b/lldb/unittests/Highlighter/HighlighterTest.cpp
index 6c0c5694967ce..9c7ee9de7bccb 100644
--- a/lldb/unittests/Highlighter/HighlighterTest.cpp
+++ b/lldb/unittests/Highlighter/HighlighterTest.cpp
@@ -16,6 +16,10 @@
#include "lldb/Core/Highlighter.h"
#include "lldb/Host/FileSystem.h"
+#if LLDB_ENABLE_TREESITTER
+#include "Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h"
+#endif
+
#include "TestingSupport/SubsystemRAII.h"
#include <optional>
@@ -25,8 +29,12 @@ namespace {
class HighlighterTest : public testing::Test {
// We need the language plugins for detecting the language based on the
// filename.
- SubsystemRAII<FileSystem, ClangHighlighter, DefaultHighlighter,
- CPlusPlusLanguage, ObjCLanguage, ObjCPlusPlusLanguage>
+ SubsystemRAII<FileSystem, ClangHighlighter,
+#if LLDB_ENABLE_TREESITTER
+ SwiftTreeSitterHighlighter,
+#endif
+ DefaultHighlighter, CPlusPlusLanguage, ObjCLanguage,
+ ObjCPlusPlusLanguage>
subsystems;
};
} // namespace
@@ -322,3 +330,22 @@ TEST_F(HighlighterTest, ClangCursorPosInOtherToken) {
EXPECT_EQ(" <id><c>foo</c></id> <id>c</id> = <id>bar</id>(); return 1;",
highlightC(" foo c = bar(); return 1;", s, 3));
}
+
+#if LLDB_ENABLE_TREESITTER
+static std::string
+highlightSwift(llvm::StringRef code, HighlightStyle style,
+ std::optional<size_t> cursor = std::optional<size_t>()) {
+ HighlighterManager mgr;
+ const Highlighter &h =
+ mgr.getHighlighterFor(lldb::eLanguageTypeSwift, "main.swift");
+ return h.Highlight(style, code, cursor);
+}
+
+TEST_F(HighlighterTest, SwiftComments) {
+ HighlightStyle s;
+ s.comment.Set("<cc>", "</cc>");
+
+ EXPECT_EQ(" <cc>// I'm feeling lucky today</cc>",
+ highlightSwift(" // I'm feeling lucky today", s));
+}
+#endif
More information about the llvm-branch-commits
mailing list