[Lldb-commits] [lldb] [lldb] Add tree-sitter based syntax highlighting (PR #181279)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Fri Feb 13 10:16:12 PST 2026
================
@@ -0,0 +1,231 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TreeSitterHighlighter.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/StreamString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace lldb_private;
+
+TreeSitterHighlighter::TSState::~TSState() {
+ if (query)
+ ts_query_delete(query);
+ if (parser)
+ ts_parser_delete(parser);
+}
+
+TreeSitterHighlighter::TSState::operator bool() const {
+ return parser && query;
+}
+
+TreeSitterHighlighter::TSState &TreeSitterHighlighter::GetTSState() const {
+ if (m_ts_state)
+ return *m_ts_state;
+
+ Log *log = GetLog(LLDBLog::Source);
+
+ m_ts_state.emplace();
+ m_ts_state->parser = ts_parser_new();
+ if (!m_ts_state->parser) {
+ LLDB_LOG(log, "Creating tree-sitter parser failed for {0}", GetName());
+ return *m_ts_state;
+ }
+
+ const TSLanguage *language = GetLanguage();
+ if (!language || !ts_parser_set_language(m_ts_state->parser, language)) {
+ LLDB_LOG(log, "Creating tree-sitter language failed for {0}", GetName());
+ return *m_ts_state;
+ }
+
+ llvm::StringRef query_source = GetHighlightQuery();
+ uint32_t error_offset = 0;
+ TSQueryError error_type = TSQueryErrorNone;
+ m_ts_state->query = ts_query_new(language, query_source.data(),
+ static_cast<uint32_t>(query_source.size()),
+ &error_offset, &error_type);
+ if (!m_ts_state->query || error_type != TSQueryErrorNone) {
+ LLDB_LOG(log,
+ "Creating tree-sitter query failed for {0} with error {1}: {2}",
+ GetName(), error_type, query_source.substr(error_offset, 64));
+ // If we have an error but a valid query, we need to reset the object to
+ // (1) avoid it looking valid and (2) release the parser.
+ m_ts_state.emplace();
+ }
+
+ return *m_ts_state;
+}
+
+const HighlightStyle::ColorStyle *
+TreeSitterHighlighter::GetStyleForCapture(llvm::StringRef capture_name,
+ const HighlightStyle &options) const {
+ return llvm::StringSwitch<const HighlightStyle::ColorStyle *>(capture_name)
+ .Case("comment", &options.comment)
+ .Case("keyword", &options.keyword)
+ .Case("operator", &options.operators)
+ .Case("type", &options.keyword)
+ .Case("punctuation.delimiter.comma", &options.comma)
+ .Case("punctuation.delimiter.colon", &options.colon)
+ .Case("punctuation.delimiter.semicolon", &options.semicolons)
+ .Case("punctuation.bracket.square", &options.square_brackets)
+ .Cases({"keyword.directive", "preproc"}, &options.pp_directive)
+ .Cases({"string", "string.literal"}, &options.string_literal)
+ .Cases({"number", "number.literal", "constant.numeric"},
+ &options.scalar_literal)
+ .Cases({"identifier", "variable", "function"}, &options.identifier)
+ .Cases({"punctuation.bracket.curly", "punctuation.brace"},
+ &options.braces)
+ .Cases({"punctuation.bracket.round", "punctuation.bracket",
+ "punctuation.paren"},
+ &options.parentheses)
+ .Default(nullptr);
+}
+
+void TreeSitterHighlighter::HighlightRange(
+ const HighlightStyle &options, llvm::StringRef text, uint32_t start_byte,
+ uint32_t end_byte, const HighlightStyle::ColorStyle *style,
+ std::optional<size_t> cursor_pos, bool &highlighted_cursor,
+ Stream &s) const {
+
+ if (start_byte >= end_byte || start_byte >= text.size())
+ return;
+
+ end_byte = std::min(end_byte, static_cast<uint32_t>(text.size()));
+
+ llvm::StringRef range = text.substr(start_byte, end_byte - start_byte);
+
+ auto print = [&](llvm::StringRef str) {
+ if (style)
+ style->Apply(s, str);
+ else
+ s << str;
+ };
+
+ // Check if cursor is within this range.
+ if (cursor_pos && *cursor_pos >= start_byte && *cursor_pos < end_byte &&
+ !highlighted_cursor) {
+ highlighted_cursor = true;
+
+ // Split range around cursor position.
+ const size_t cursor_in_range = *cursor_pos - start_byte;
+
+ // Print everything before the cursor.
+ if (cursor_in_range > 0) {
+ llvm::StringRef before = range.substr(0, cursor_in_range);
+ print(before);
+ }
+
+ // Print the cursor itself.
+ if (cursor_in_range < range.size()) {
+ StreamString cursor_str;
+ llvm::StringRef cursor_char = range.substr(cursor_in_range, 1);
+ if (style)
+ style->Apply(cursor_str, cursor_char);
+ else
+ cursor_str << cursor_char;
+ options.selected.Apply(s, cursor_str.GetString());
+ }
+
+ // Print everything after the cursor.
+ if (cursor_in_range + 1 < range.size()) {
+ llvm::StringRef after = range.substr(cursor_in_range + 1);
+ print(after);
+ }
+ } else {
+ // No cursor in this range, apply style directly.
+ print(range);
+ }
+}
+
+void TreeSitterHighlighter::Highlight(const HighlightStyle &options,
+ llvm::StringRef line,
+ std::optional<size_t> cursor_pos,
+ llvm::StringRef previous_lines,
+ Stream &s) const {
+ auto unformatted = [&]() -> void { s << line; };
+
+ TSState &ts_state = GetTSState();
+ if (!ts_state)
+ return unformatted();
+
+ std::string source = previous_lines.str() + line.str();
+ TSTree *tree =
+ ts_parser_parse_string(ts_state.parser, nullptr, source.c_str(),
+ static_cast<uint32_t>(source.size()));
+ if (!tree)
+ return unformatted();
+
+ TSQueryCursor *cursor = ts_query_cursor_new();
+ assert(cursor);
+
+ llvm::scope_exit delete_cusor([&] { ts_query_cursor_delete(cursor); });
+
+ TSNode root_node = ts_tree_root_node(tree);
+ ts_query_cursor_exec(cursor, ts_state.query, root_node);
+
+ // Collect all matches and their byte ranges.
+ std::vector<HLRange> highlights;
+ TSQueryMatch match;
+ uint32_t capture_index;
+ while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) {
+ TSQueryCapture capture = match.captures[capture_index];
+
+ uint32_t capture_name_len = 0;
+ const char *capture_name = ts_query_capture_name_for_id(
+ ts_state.query, capture.index, &capture_name_len);
+
+ const HighlightStyle::ColorStyle *style = GetStyleForCapture(
+ llvm::StringRef(capture_name, capture_name_len), options);
+
+ TSNode node = capture.node;
+ uint32_t start = ts_node_start_byte(node);
+ uint32_t end = ts_node_end_byte(node);
+
+ if (style && start < end)
+ highlights.push_back({start, end, style});
+ }
+
+ std::sort(highlights.begin(), highlights.end(),
+ [](const HLRange &a, const HLRange &b) {
+ if (a.start_byte != b.start_byte)
+ return a.start_byte < b.start_byte;
+ // Prefer longer matches.
+ return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte);
----------------
JDevlieghere wrote:
That's a good point, I was trying to keep things simple by going with the longest match, but you make a good point. I'll invert the condition.
https://github.com/llvm/llvm-project/pull/181279
More information about the lldb-commits
mailing list