[clang] [clang][Diagnostics] Highlight code snippets (PR #66514)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 15 11:46:34 PDT 2023
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/66514
>From 85e868765f37e09b922ff00869f1f1a7ff1ebd9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 15 Sep 2023 15:51:39 +0200
Subject: [PATCH] [clang][Diagnostics] Highlight code snippets
Add some primitive syntax highlighting to our code snippet output.
---
.../clang/Frontend/CodeSnippetHighlighter.h | 46 +++++++
clang/include/clang/Frontend/TextDiagnostic.h | 4 +-
clang/lib/Frontend/CMakeLists.txt | 1 +
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 120 ++++++++++++++++++
clang/lib/Frontend/TextDiagnostic.cpp | 30 ++++-
5 files changed, 198 insertions(+), 3 deletions(-)
create mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h
create mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
new file mode 100644
index 000000000000000..776954b59e2e1a8
--- /dev/null
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -0,0 +1,46 @@
+//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
+#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
+
+#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+namespace clang {
+
+struct StyleRange {
+ unsigned Start;
+ unsigned End;
+ const enum llvm::raw_ostream::Colors c;
+};
+
+class CodeSnippetHighlighter final {
+public:
+ CodeSnippetHighlighter() = default;
+
+ /// Produce StyleRanges for the given line.
+ /// The returned vector contains non-overlapping style ranges. They are sorted
+ /// from beginning of the line to the end.
+ std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+ const LangOptions &LangOpts);
+
+private:
+ bool Initialized = false;
+ /// Fills Keywords and Literals.
+ void ensureTokenData();
+
+ llvm::SmallSet<StringRef, 12> Keywords;
+ llvm::SmallSet<StringRef, 12> Literals;
+};
+
+} // namespace clang
+
+#endif
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 7eb0ab0cdc9bca8..409dc4799307eeb 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
#define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
+#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Frontend/DiagnosticRenderer.h"
namespace clang {
@@ -33,6 +34,7 @@ namespace clang {
/// printing coming out of libclang.
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
+ CodeSnippetHighlighter SnippetHighlighter;
public:
TextDiagnostic(raw_ostream &OS,
@@ -104,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer {
ArrayRef<FixItHint> Hints);
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
- unsigned LineNo);
+ unsigned LineNo, const SourceManager &SM);
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index 1e5f0a859dfd568..f3547f771593093 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -42,6 +42,7 @@ add_clang_library(clangFrontend
TextDiagnosticPrinter.cpp
VerifyDiagnosticConsumer.cpp
InterfaceStubFunctionsConsumer.cpp
+ CodeSnippetHighlighter.cpp
DEPENDS
ClangDriverOptions
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
new file mode 100644
index 000000000000000..829a533ad2692e5
--- /dev/null
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -0,0 +1,120 @@
+
+#include "clang/Frontend/CodeSnippetHighlighter.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+
+void CodeSnippetHighlighter::ensureTokenData() {
+ if (Initialized)
+ return;
+
+ // List of keywords, literals and types we want to highlight.
+ // These are best-effort, as is everything we do wrt. highlighting.
+ Keywords.insert("_Static_assert");
+ Keywords.insert("auto");
+ Keywords.insert("concept");
+ Keywords.insert("const");
+ Keywords.insert("consteval");
+ Keywords.insert("constexpr");
+ Keywords.insert("delete");
+ Keywords.insert("do");
+ Keywords.insert("else");
+ Keywords.insert("final");
+ Keywords.insert("for");
+ Keywords.insert("if");
+ Keywords.insert("mutable");
+ Keywords.insert("namespace");
+ Keywords.insert("new");
+ Keywords.insert("private");
+ Keywords.insert("public");
+ Keywords.insert("requires");
+ Keywords.insert("return");
+ Keywords.insert("static");
+ Keywords.insert("static_assert");
+ Keywords.insert("using");
+ Keywords.insert("void");
+ Keywords.insert("volatile");
+ Keywords.insert("while");
+
+ // Builtin types we highlight
+ Keywords.insert("void");
+ Keywords.insert("char");
+ Keywords.insert("short");
+ Keywords.insert("int");
+ Keywords.insert("unsigned");
+ Keywords.insert("long");
+ Keywords.insert("float");
+ Keywords.insert("double");
+
+ Literals.insert("true");
+ Literals.insert("false");
+ Literals.insert("nullptr");
+
+ Initialized = true;
+}
+
+static SourceManager createTempSourceManager() {
+ FileSystemOptions FileOpts;
+ FileManager FileMgr(FileOpts);
+ llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs());
+ llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
+ DiagnosticsEngine diags(DiagIDs, DiagOpts);
+ return SourceManager(diags, FileMgr);
+}
+
+static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
+ const LangOptions &LangOpts) {
+ return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
+}
+
+std::vector<StyleRange>
+CodeSnippetHighlighter::highlightLine(StringRef SourceLine,
+ const LangOptions &LangOpts) {
+ ensureTokenData();
+
+ constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
+ constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
+ constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
+
+ const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
+ SourceManager FakeSM = createTempSourceManager();
+ Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
+ L.SetKeepWhitespaceMode(true);
+
+ std::vector<StyleRange> Styles;
+ bool Stop = false;
+ while (!Stop) {
+ Token tok;
+ Stop = L.LexFromRawLexer(tok);
+ if (tok.is(tok::unknown))
+ continue;
+
+ bool Invalid;
+ unsigned Start =
+ FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ if (tok.is(tok::raw_identifier)) {
+ // Almost everything we lex is an identifier, since we use a raw lexer.
+ // Some should be highlightes as literals, others as keywords.
+ if (Keywords.contains(tok.getRawIdentifier()))
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), KeywordColor});
+ else if (Literals.contains(tok.getRawIdentifier()))
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), LiteralColor});
+ } else if (tok::isLiteral(tok.getKind())) {
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), LiteralColor});
+ } else if (tok.is(tok::comment)) {
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), CommentColor});
+ }
+ }
+
+ return Styles;
+}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index eaa6e8d29a1dece..49b3e4de56de1d4 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -11,6 +11,7 @@
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -1248,7 +1249,7 @@ void TextDiagnostic::emitSnippetAndCaret(
}
// Emit what we have computed.
- emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
+ emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo, SM);
if (!CaretLine.empty()) {
indentForLineNumbers();
@@ -1278,7 +1279,10 @@ void TextDiagnostic::emitSnippetAndCaret(
void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth,
- unsigned LineNo) {
+ unsigned LineNo, const SourceManager &SM) {
+ std::vector<StyleRange> Styles =
+ SnippetHighlighter.highlightLine(SourceLine, LangOpts);
+
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
@@ -1288,11 +1292,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
// Print the source line one character at a time.
bool PrintReversed = false;
+ bool HighlightingEnabled = DiagOpts->ShowColors;
size_t I = 0;
while (I < SourceLine.size()) {
auto [Str, WasPrintable] =
printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop);
+ // Just stop highlighting anything for this line if we found a non-printable
+ // character.
+ if (!WasPrintable)
+ HighlightingEnabled = false;
+
+ // FIXME: I hope we can do this in some nicer way.
+ if (HighlightingEnabled) {
+ std::optional<enum raw_ostream::Colors> H;
+ for (auto &P : Styles) {
+ if (P.Start < I && P.End >= I) {
+ H = P.c;
+ break;
+ }
+ }
+
+ if (H) {
+ OS.changeColor(*H, false);
+ } else
+ OS.resetColor();
+ }
+
// Toggle inverted colors on or off for this character.
if (DiagOpts->ShowColors) {
if (WasPrintable == PrintReversed) {
More information about the cfe-commits
mailing list