[Lldb-commits] [lldb] [lldb] Add tree-sitter based syntax highlighting (PR #181279)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Thu Feb 12 16:46:00 PST 2026
https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/181279
>From 3a08e2aa927886dbd7b4d133a0b6a376d69d3094 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 12 Feb 2026 16:23:00 -0800
Subject: [PATCH 1/2] [lldb] Add tree-sitter based syntax highlighting
This adds the necessary infrastructure to use tree-sitter for syntax
highlighting in LLDB. It provides the base class for a tree-sitter
highlighter plugin. Its primary function is interfacing with the
tree-sitter library, and converting captures to highlighting styles.
Adding a new tree-sitter highlighter consists of creating an LLDB plugin
that inherits from this class. The plugin has two core responsibilities:
1. Loading the tree-sitter grammar.
2. Specifying the tree-sitter syntax highlighting query.
Everything else is handled by the base class, making it extremely easy
to add a new language.
For more context and the motivation behind using tree-sitter for syntax
highlighting, see #170250.
---
lldb/cmake/modules/FindTreeSitter.cmake | 18 ++
lldb/cmake/modules/LLDBConfig.cmake | 1 +
.../source/Plugins/Highlighter/CMakeLists.txt | 3 +
.../Highlighter/TreeSitter/CMakeLists.txt | 10 +
.../TreeSitter/TreeSitterHighlighter.cpp | 231 ++++++++++++++++++
.../TreeSitter/TreeSitterHighlighter.h | 77 ++++++
6 files changed, 340 insertions(+)
create mode 100644 lldb/cmake/modules/FindTreeSitter.cmake
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
diff --git a/lldb/cmake/modules/FindTreeSitter.cmake b/lldb/cmake/modules/FindTreeSitter.cmake
new file mode 100644
index 0000000000000..04a40507cc9ab
--- /dev/null
+++ b/lldb/cmake/modules/FindTreeSitter.cmake
@@ -0,0 +1,18 @@
+# FindTreeSitter.cmake
+
+include(FindPackageHandleStandardArgs)
+
+find_path(TreeSitter_INCLUDE_DIR
+ NAMES tree_sitter/api.h)
+
+find_library(TreeSitter_LIBRARY
+ NAMES tree-sitter treesitter)
+
+find_package_handle_standard_args(TreeSitter
+ REQUIRED_VARS TreeSitter_LIBRARY TreeSitter_INCLUDE_DIR
+)
+
+mark_as_advanced(
+ TreeSitter_INCLUDE_DIR
+ TreeSitter_LIBRARY
+)
diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index d4471b8a5418d..f00824d692fe1 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -64,6 +64,7 @@ add_optional_dependency(LLDB_ENABLE_LUA "Enable Lua scripting support in LLDB" L
add_optional_dependency(LLDB_ENABLE_PYTHON "Enable Python scripting support in LLDB" PythonAndSwig PYTHONANDSWIG_FOUND)
add_optional_dependency(LLDB_ENABLE_LIBXML2 "Enable Libxml 2 support in LLDB" LibXml2 LIBXML2_FOUND VERSION ${LLDB_LIBXML2_VERSION})
add_optional_dependency(LLDB_ENABLE_FBSDVMCORE "Enable libfbsdvmcore support in LLDB" FBSDVMCore FBSDVMCore_FOUND QUIET)
+add_optional_dependency(LLDB_ENABLE_TREESITTER "Enable Tree-sitter syntax highlighting" TreeSitter TREESITTER_FOUND)
option(LLDB_USE_ENTITLEMENTS "When codesigning, use entitlements if available" ON)
option(LLDB_BUILD_FRAMEWORK "Build LLDB.framework (Darwin only)" OFF)
diff --git a/lldb/source/Plugins/Highlighter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/CMakeLists.txt
index 88033867b1a12..704ea5f145794 100644
--- a/lldb/source/Plugins/Highlighter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/CMakeLists.txt
@@ -1,2 +1,5 @@
+if (LLDB_ENABLE_TREESITTER)
+ add_subdirectory(TreeSitter)
+endif()
add_subdirectory(Clang)
add_subdirectory(Default)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
new file mode 100644
index 0000000000000..99c03da999720
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_lldb_library(lldbTreeSitter
+ TreeSitterHighlighter.cpp
+
+ LINK_COMPONENTS
+ Support
+ ADT
+ LINK_LIBS
+ lldbUtility
+ ${TreeSitter_LIBRARY}
+)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
new file mode 100644
index 0000000000000..181dc90683597
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
@@ -0,0 +1,231 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TreeSitterHighlighter.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/StreamString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace lldb_private;
+
+TreeSitterHighlighter::TSState::~TSState() {
+ if (query)
+ ts_query_delete(query);
+ if (parser)
+ ts_parser_delete(parser);
+}
+
+TreeSitterHighlighter::TSState::operator bool() const {
+ return parser && query;
+}
+
+TreeSitterHighlighter::TSState &TreeSitterHighlighter::GetTSState() const {
+ if (m_ts_state)
+ return *m_ts_state;
+
+ Log *log = GetLog(LLDBLog::Source);
+
+ m_ts_state.emplace();
+ m_ts_state->parser = ts_parser_new();
+ if (!m_ts_state->parser) {
+ LLDB_LOG(log, "Creating tree-sitter parser failed for {0}", GetName());
+ return *m_ts_state;
+ }
+
+ const TSLanguage *language = GetLanguage();
+ if (!language || !ts_parser_set_language(m_ts_state->parser, language)) {
+ LLDB_LOG(log, "Creating tree-sitter language failed for {0}", GetName());
+ return *m_ts_state;
+ }
+
+ llvm::StringRef query_source = GetHighlightQuery();
+ uint32_t error_offset = 0;
+ TSQueryError error_type = TSQueryErrorNone;
+ m_ts_state->query = ts_query_new(language, query_source.data(),
+ static_cast<uint32_t>(query_source.size()),
+ &error_offset, &error_type);
+ if (!m_ts_state->query || error_type != TSQueryErrorNone) {
+ LLDB_LOG(log,
+ "Creating tree-sitter query failed for {0} with error {1}: {2}",
+ GetName(), error_type, query_source.substr(error_offset, 64));
+ // If we have an error but a valid query, we need to reset the object to
+ // (1) avoid it looking valid and (2) release the parser.
+ m_ts_state.emplace();
+ }
+
+ return *m_ts_state;
+}
+
+const HighlightStyle::ColorStyle *
+TreeSitterHighlighter::GetStyleForCapture(llvm::StringRef capture_name,
+ const HighlightStyle &options) const {
+ return llvm::StringSwitch<const HighlightStyle::ColorStyle *>(capture_name)
+ .Case("comment", &options.comment)
+ .Case("keyword", &options.keyword)
+ .Case("operator", &options.operators)
+ .Case("type", &options.keyword)
+ .Case("punctuation.delimiter.comma", &options.comma)
+ .Case("punctuation.delimiter.colon", &options.colon)
+ .Case("punctuation.delimiter.semicolon", &options.semicolons)
+ .Case("punctuation.bracket.square", &options.square_brackets)
+ .Cases({"keyword.directive", "preproc"}, &options.pp_directive)
+ .Cases({"string", "string.literal"}, &options.string_literal)
+ .Cases({"number", "number.literal", "constant.numeric"},
+ &options.scalar_literal)
+ .Cases({"identifier", "variable", "function"}, &options.identifier)
+ .Cases({"punctuation.bracket.curly", "punctuation.brace"},
+ &options.braces)
+ .Cases({"punctuation.bracket.round", "punctuation.bracket",
+ "punctuation.paren"},
+ &options.parentheses)
+ .Default(nullptr);
+}
+
+void TreeSitterHighlighter::HighlightRange(
+ const HighlightStyle &options, llvm::StringRef text, uint32_t start_byte,
+ uint32_t end_byte, const HighlightStyle::ColorStyle *style,
+ std::optional<size_t> cursor_pos, bool &highlighted_cursor,
+ Stream &s) const {
+
+ if (start_byte >= end_byte || start_byte >= text.size())
+ return;
+
+ end_byte = std::min(end_byte, static_cast<uint32_t>(text.size()));
+
+ llvm::StringRef range = text.substr(start_byte, end_byte - start_byte);
+
+ auto print = [&](llvm::StringRef str) {
+ if (style)
+ style->Apply(s, str);
+ else
+ s << str;
+ };
+
+ // Check if cursor is within this range.
+ if (cursor_pos && *cursor_pos >= start_byte && *cursor_pos < end_byte &&
+ !highlighted_cursor) {
+ highlighted_cursor = true;
+
+ // Split range around cursor position.
+ const size_t cursor_in_range = *cursor_pos - start_byte;
+
+ // Print everything before the cursor.
+ if (cursor_in_range > 0) {
+ llvm::StringRef before = range.substr(0, cursor_in_range);
+ print(before);
+ }
+
+ // Print the cursor itself.
+ if (cursor_in_range < range.size()) {
+ StreamString cursor_str;
+ llvm::StringRef cursor_char = range.substr(cursor_in_range, 1);
+ if (style)
+ style->Apply(cursor_str, cursor_char);
+ else
+ cursor_str << cursor_char;
+ options.selected.Apply(s, cursor_str.GetString());
+ }
+
+ // Print everything after the cursor.
+ if (cursor_in_range + 1 < range.size()) {
+ llvm::StringRef after = range.substr(cursor_in_range + 1);
+ print(after);
+ }
+ } else {
+ // No cursor in this range, apply style directly.
+ print(range);
+ }
+}
+
+void TreeSitterHighlighter::Highlight(const HighlightStyle &options,
+ llvm::StringRef line,
+ std::optional<size_t> cursor_pos,
+ llvm::StringRef previous_lines,
+ Stream &s) const {
+ auto unformatted = [&]() -> void { s << line; };
+
+ TSState &ts_state = GetTSState();
+ if (!ts_state)
+ return unformatted();
+
+ std::string source = previous_lines.str() + line.str();
+ TSTree *tree =
+ ts_parser_parse_string(ts_state.parser, nullptr, source.c_str(),
+ static_cast<uint32_t>(source.size()));
+ if (!tree)
+ return unformatted();
+
+ TSQueryCursor *cursor = ts_query_cursor_new();
+ assert(cursor);
+
+ llvm::scope_exit delete_cusor([&] { ts_query_cursor_delete(cursor); });
+
+ TSNode root_node = ts_tree_root_node(tree);
+ ts_query_cursor_exec(cursor, ts_state.query, root_node);
+
+ // Collect all matches and their byte ranges.
+ std::vector<HLRange> highlights;
+ TSQueryMatch match;
+ uint32_t capture_index;
+ while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) {
+ TSQueryCapture capture = match.captures[capture_index];
+
+ uint32_t capture_name_len = 0;
+ const char *capture_name = ts_query_capture_name_for_id(
+ ts_state.query, capture.index, &capture_name_len);
+
+ const HighlightStyle::ColorStyle *style = GetStyleForCapture(
+ llvm::StringRef(capture_name, capture_name_len), options);
+
+ TSNode node = capture.node;
+ uint32_t start = ts_node_start_byte(node);
+ uint32_t end = ts_node_end_byte(node);
+
+ if (style && start < end)
+ highlights.push_back({start, end, style});
+ }
+
+ std::sort(highlights.begin(), highlights.end(),
+ [](const HLRange &a, const HLRange &b) {
+ if (a.start_byte != b.start_byte)
+ return a.start_byte < b.start_byte;
+ // Prefer longer matches.
+ return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte);
+ });
+
+ uint32_t current_pos = 0;
+ bool highlighted_cursor = false;
+
+ for (const auto &h : highlights) {
+ // Skip over highlights that start before our current position, which means
+ // there's overlap.
+ if (h.start_byte < current_pos)
+ continue;
+
+ // Output any unhighlighted text before this highlight.
+ if (current_pos < h.start_byte) {
+ HighlightRange(options, line, current_pos, h.start_byte, nullptr,
+ cursor_pos, highlighted_cursor, s);
+ current_pos = h.start_byte;
+ }
+
+ // Output the highlighted range.
+ HighlightRange(options, line, h.start_byte, h.end_byte, h.style, cursor_pos,
+ highlighted_cursor, s);
+ current_pos = h.end_byte;
+ }
+
+ // Output any remaining unhighlighted text.
+ if (current_pos < line.size()) {
+ HighlightRange(options, line, current_pos,
+ static_cast<uint32_t>(line.size()), nullptr, cursor_pos,
+ highlighted_cursor, s);
+ }
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
new file mode 100644
index 0000000000000..38530400b2b1d
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
+
+#include "lldb/Core/Highlighter.h"
+#include "lldb/Utility/Stream.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <optional>
+#include <tree_sitter/api.h>
+
+namespace lldb_private {
+
+struct TSState;
+
+class TreeSitterHighlighter : public Highlighter {
+public:
+ TreeSitterHighlighter() = default;
+ ~TreeSitterHighlighter() override = default;
+
+ /// Highlights a single line of code using tree-sitter parsing.
+ void Highlight(const HighlightStyle &options, llvm::StringRef line,
+ std::optional<size_t> cursor_pos,
+ llvm::StringRef previous_lines, Stream &s) const override;
+
+protected:
+ /// Returns the tree-sitter language for this highlighter.
+ virtual const TSLanguage *GetLanguage() const = 0;
+
+ /// Returns the tree-sitter highlight query for this language.
+ virtual llvm::StringRef GetHighlightQuery() const = 0;
+
+private:
+ /// Maps a tree-sitter capture name to a HighlightStyle color.
+ const HighlightStyle::ColorStyle *
+ GetStyleForCapture(llvm::StringRef capture_name,
+ const HighlightStyle &options) const;
+
+ /// Applies syntax highlighting to a range of text.
+ void HighlightRange(const HighlightStyle &options, llvm::StringRef text,
+ uint32_t start_byte, uint32_t end_byte,
+ const HighlightStyle::ColorStyle *style,
+ std::optional<size_t> cursor_pos,
+ bool &highlighted_cursor, Stream &s) const;
+
+ struct HLRange {
+ uint32_t start_byte;
+ uint32_t end_byte;
+ const HighlightStyle::ColorStyle *style;
+ };
+
+ struct TSState {
+ TSState() = default;
+ TSState &operator=(const TSState &) = delete;
+ TSState(const TSState &) = delete;
+ ~TSState();
+
+ explicit operator bool() const;
+ TSParser *parser = nullptr;
+ TSQuery *query = nullptr;
+ };
+
+ /// Lazily creates a tree-sitter state (TSState).
+ TSState &GetTSState() const;
+ mutable std::optional<TSState> m_ts_state;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
>From d766461d72346dcc9f2a8ad0e32186242286bd51 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 12 Feb 2026 16:45:45 -0800
Subject: [PATCH 2/2] Fix bogus component
---
lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt | 1 -
1 file changed, 1 deletion(-)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
index 99c03da999720..f85595d3b574c 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -3,7 +3,6 @@ add_lldb_library(lldbTreeSitter
LINK_COMPONENTS
Support
- ADT
LINK_LIBS
lldbUtility
${TreeSitter_LIBRARY}
More information about the lldb-commits
mailing list