[clang] [clang][Diagnostics] Highlight code snippets (PR #66514)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 19 00:28:49 PST 2024
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/66514 at github.com>
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/66514
>From 41bb52dc591ce1c154a4272abb738c80a7a57b90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 15 Sep 2023 15:51:39 +0200
Subject: [PATCH 1/3] [clang][Diagnostics] Highlight code snippets
Add some primitive syntax highlighting to our code snippet output.
---
clang/include/clang/Frontend/TextDiagnostic.h | 18 +-
clang/include/clang/Lex/Preprocessor.h | 5 +
clang/lib/Frontend/TextDiagnostic.cpp | 197 +++++++++++++++++-
clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +-
clang/lib/Lex/Preprocessor.cpp | 24 +++
5 files changed, 232 insertions(+), 14 deletions(-)
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 7eb0ab0cdc9bca..a2fe8ae995423b 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -16,6 +16,7 @@
#define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
#include "clang/Frontend/DiagnosticRenderer.h"
+#include "llvm/Support/raw_ostream.h"
namespace clang {
@@ -33,14 +34,22 @@ namespace clang {
/// printing coming out of libclang.
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
+ const Preprocessor *PP;
public:
- TextDiagnostic(raw_ostream &OS,
- const LangOptions &LangOpts,
- DiagnosticOptions *DiagOpts);
+ TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+ DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr);
~TextDiagnostic() override;
+ struct StyleRange {
+ unsigned Start;
+ unsigned End;
+ enum llvm::raw_ostream::Colors Color;
+ StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C)
+ : Start(S), End(E), Color(C){};
+ };
+
/// Print the diagonstic level to a raw_ostream.
///
/// This is a static helper that handles colorizing the level and formatting
@@ -104,7 +113,8 @@ class TextDiagnostic : public DiagnosticRenderer {
ArrayRef<FixItHint> Hints);
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
- unsigned LineNo);
+ unsigned LineNo, unsigned DisplayLineNo,
+ ArrayRef<StyleRange> Styles);
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4ec21a8b6be2c8..d89e2be1bf5ff5 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -284,6 +284,8 @@ class Preprocessor {
/// The kind of translation unit we are processing.
const TranslationUnitKind TUKind;
+ const char *getCheckPoint(FileID FID, const char *Start) const;
+
private:
/// The code-completion handler.
CodeCompletionHandler *CodeComplete = nullptr;
@@ -311,6 +313,9 @@ class Preprocessor {
/// The import path for named module that we're currently processing.
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
+ llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
+ unsigned CheckPointCounter = 0;
+
/// Whether the import is an `@import` or a standard c++ modules import.
bool IsAtImport = false;
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 779dead5d058d1..691809ceded87b 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -12,6 +12,7 @@
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
@@ -41,6 +42,14 @@ static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
static const enum raw_ostream::Colors savedColor =
raw_ostream::SAVEDCOLOR;
+// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
+// is already taken for 'note'. Green is already used to underline
+// source ranges. White and black are bad because of the usual
+// terminal backgrounds. Which leaves us only with TWO options.
+static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
+static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
+
/// Add highlights to differences in template strings.
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
bool &Normal, bool Bold) {
@@ -644,10 +653,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
return Wrapped;
}
-TextDiagnostic::TextDiagnostic(raw_ostream &OS,
- const LangOptions &LangOpts,
- DiagnosticOptions *DiagOpts)
- : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {}
+TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+ DiagnosticOptions *DiagOpts,
+ const Preprocessor *PP)
+ : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
TextDiagnostic::~TextDiagnostic() {}
@@ -1112,6 +1121,148 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
return LineRanges;
}
+/// Creates syntax highlighting information in form of StyleRanges.
+///
+/// The returned unique ptr has always exactly size
+/// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there
+/// corresponds to syntax highlighting information in one line. In each line,
+/// the StyleRanges are non-overlapping and sorted from start to end of the
+/// line.
+static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
+highlightLines(StringRef FileData, unsigned StartLineNumber,
+ unsigned EndLineNumber, const Preprocessor *PP,
+ const LangOptions &LangOpts, bool ShowColors, FileID FID,
+ const SourceManager &SM) {
+ assert(StartLineNumber <= EndLineNumber);
+ auto SnippetRanges =
+ std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>(
+ EndLineNumber - StartLineNumber + 1);
+
+ if (!PP || !ShowColors)
+ return SnippetRanges;
+
+ // Might cause emission of another diagnostic.
+ if (PP->getIdentifierTable().getExternalIdentifierLookup())
+ return SnippetRanges;
+
+ auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData);
+ Lexer L{FID, *Buff, SM, LangOpts};
+ L.SetKeepWhitespaceMode(true);
+
+ const char *LineStart =
+ FileData.data() +
+ SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second;
+ if (const char *CheckPoint = PP->getCheckPoint(FID, LineStart)) {
+ assert(CheckPoint >= Buff->getBufferStart() &&
+ CheckPoint <= Buff->getBufferEnd());
+ assert(CheckPoint <= LineStart);
+ size_t Offset = CheckPoint - Buff->getBufferStart();
+ L.seek(Offset, /*IsAtStartOfLine=*/false);
+ }
+
+ // Classify the given token and append it to the given vector.
+ auto appendStyle =
+ [PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec,
+ const Token &T, unsigned Start, unsigned Length) -> void {
+ if (T.is(tok::raw_identifier)) {
+ StringRef RawIdent = T.getRawIdentifier();
+ // Special case true/false/nullptr literals, since they will otherwise be
+ // treated as keywords.
+ if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
+ Vec.emplace_back(Start, Start + Length, LiteralColor);
+ } else {
+ const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
+ assert(II);
+ if (II->isKeyword(LangOpts))
+ Vec.emplace_back(Start, Start + Length, KeywordColor);
+ }
+ } else if (tok::isLiteral(T.getKind())) {
+ Vec.emplace_back(Start, Start + Length, LiteralColor);
+ } else {
+ assert(T.is(tok::comment));
+ Vec.emplace_back(Start, Start + Length, CommentColor);
+ }
+ };
+
+ bool Stop = false;
+ while (!Stop) {
+ Token T;
+ Stop = L.LexFromRawLexer(T);
+ if (T.is(tok::unknown))
+ continue;
+
+ // We are only interested in identifiers, literals and comments.
+ if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
+ !tok::isLiteral(T.getKind()))
+ continue;
+
+ bool Invalid = false;
+ unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
+ if (Invalid || TokenEndLine < StartLineNumber)
+ continue;
+
+ assert(TokenEndLine >= StartLineNumber);
+
+ unsigned TokenStartLine =
+ SM.getSpellingLineNumber(T.getLocation(), &Invalid);
+ if (Invalid)
+ continue;
+ // If this happens, we're done.
+ if (TokenStartLine > EndLineNumber)
+ break;
+
+ unsigned StartCol =
+ SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ // Simple tokens.
+ if (TokenStartLine == TokenEndLine) {
+ SmallVector<TextDiagnostic::StyleRange> &LineRanges =
+ SnippetRanges[TokenStartLine - StartLineNumber];
+ appendStyle(LineRanges, T, StartCol, T.getLength());
+ continue;
+ }
+ assert((TokenEndLine - TokenStartLine) >= 1);
+
+ // For tokens that span multiple lines (think multiline comments), we
+ // divide them into multiple StyleRanges.
+ unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
+
+ unsigned L = TokenStartLine;
+ unsigned LineLength = 0;
+ for (unsigned I = 0; I <= Spelling.size(); ++I) {
+ // This line is done.
+ if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
+ SmallVector<TextDiagnostic::StyleRange> &LineRanges =
+ SnippetRanges[L - StartLineNumber];
+
+ if (L >= StartLineNumber) {
+ if (L == TokenStartLine) // First line
+ appendStyle(LineRanges, T, StartCol, LineLength);
+ else if (L == TokenEndLine) // Last line
+ appendStyle(LineRanges, T, 0, EndCol);
+ else
+ appendStyle(LineRanges, T, 0, LineLength);
+ }
+
+ ++L;
+ if (L > EndLineNumber)
+ break;
+ LineLength = 0;
+ continue;
+ }
+ ++LineLength;
+ }
+ }
+
+ return SnippetRanges;
+}
+
/// Emit a code snippet and caret line.
///
/// This routine emits a single line's code snippet and caret line..
@@ -1181,6 +1332,12 @@ void TextDiagnostic::emitSnippetAndCaret(
OS.indent(MaxLineNoDisplayWidth + 2) << "| ";
};
+ // Prepare source highlighting information for the lines we're about to
+ // emit, starting from the first line.
+ std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles =
+ highlightLines(BufStart, Lines.first, Lines.second, PP, LangOpts,
+ DiagOpts->ShowColors, FID, SM);
+
SmallVector<LineRange> LineRanges =
prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts);
@@ -1247,7 +1404,8 @@ void TextDiagnostic::emitSnippetAndCaret(
}
// Emit what we have computed.
- emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
+ emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo,
+ SourceStyles[LineNo - Lines.first]);
if (!CaretLine.empty()) {
indentForLineNumbers();
@@ -1277,16 +1435,18 @@ void TextDiagnostic::emitSnippetAndCaret(
void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth,
- unsigned LineNo) {
+ unsigned LineNo, unsigned DisplayLineNo,
+ ArrayRef<StyleRange> Styles) {
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
- unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
+ unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo);
OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1)
- << LineNo << " | ";
+ << DisplayLineNo << " | ";
}
// Print the source line one character at a time.
bool PrintReversed = false;
+ std::optional<llvm::raw_ostream::Colors> CurrentColor;
size_t I = 0;
while (I < SourceLine.size()) {
auto [Str, WasPrintable] =
@@ -1298,10 +1458,29 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
PrintReversed = !PrintReversed;
if (PrintReversed)
OS.reverseColor();
- else
+ else {
OS.resetColor();
+ CurrentColor = std::nullopt;
+ }
+ }
+
+ // Apply syntax highlighting information.
+ const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) {
+ return (R.Start < I && R.End >= I);
+ });
+
+ if (CharStyle != Styles.end()) {
+ if (!CurrentColor ||
+ (CurrentColor && *CurrentColor != CharStyle->Color)) {
+ OS.changeColor(CharStyle->Color, false);
+ CurrentColor = CharStyle->Color;
+ }
+ } else if (CurrentColor) {
+ OS.resetColor();
+ CurrentColor = std::nullopt;
}
}
+
OS << Str;
}
diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index 0ff5376098ffe8..b2fb762537573e 100644
--- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
const Preprocessor *PP) {
// Build the TextDiagnostic utility.
- TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts));
+ TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP));
}
void TextDiagnosticPrinter::EndSourceFile() {
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 64f54c6fc6382f..f756b62cc87358 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -72,6 +72,9 @@
using namespace clang;
+/// Minimum distance between two check points, in tokens.
+static constexpr unsigned CheckPointStepSize = 1024;
+
LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
@@ -954,6 +957,11 @@ void Preprocessor::Lex(Token &Result) {
}
}
+ if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
+ CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr);
+ CheckPointCounter = 0;
+ }
+
LastTokenWasAt = Result.is(tok::at);
--LexLevel;
@@ -1553,3 +1561,19 @@ void Preprocessor::createPreprocessingRecord() {
Record = new PreprocessingRecord(getSourceManager());
addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
}
+
+const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
+ if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) {
+ const SmallVector<const char *> &FileCheckPoints = It->second;
+ const char *Last = nullptr;
+ // FIXME: Do better than a linear search.
+ for (const char *P : FileCheckPoints) {
+ if (P > Start)
+ break;
+ Last = P;
+ }
+ return Last;
+ }
+
+ return nullptr;
+}
>From 7afb8f8f6291625b911fb58a8d17d12399c9bafd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Thu, 18 Jan 2024 15:10:44 +0100
Subject: [PATCH 2/3] Add test case for disabled coloring when piping
---
clang/test/Frontend/diagnostic-pipe.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
create mode 100644 clang/test/Frontend/diagnostic-pipe.c
diff --git a/clang/test/Frontend/diagnostic-pipe.c b/clang/test/Frontend/diagnostic-pipe.c
new file mode 100644
index 00000000000000..ab97e66cc198fb
--- /dev/null
+++ b/clang/test/Frontend/diagnostic-pipe.c
@@ -0,0 +1,16 @@
+
+_Static_assert(0, "");
+
+/// Test that piping the output into another process disables syntax
+/// highlighting of code snippets.
+
+// RUN: not %clang_cc1 %s -o /dev/null 2>&1 | FileCheck -strict-whitespace %s
+// CHECK: error: static assertion failed:
+// CHECK-NEXT: {{^}} 2 | _Static_assert(0, "");{{$}}
+
+
+
+// RUN: not %clang_cc1 %s -o /dev/null -color-diagnostics > %t 2>&1
+// RUN: FileCheck --input-file=%t %s -check-prefix=FILE
+// COLOR: error: static assertion failed:
+// COLOR-NEXT: {{^}} 2 | _Static_assert(0, "");{{$}}
>From 71b9d2900c213fc5f6850946cb59002faa23ff8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 19 Jan 2024 09:26:56 +0100
Subject: [PATCH 3/3] Add more keywords-that-are-literals
---
clang/lib/Frontend/TextDiagnostic.cpp | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 691809ceded87b..ed02a47435e9f4 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1166,9 +1166,23 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,
const Token &T, unsigned Start, unsigned Length) -> void {
if (T.is(tok::raw_identifier)) {
StringRef RawIdent = T.getRawIdentifier();
- // Special case true/false/nullptr literals, since they will otherwise be
- // treated as keywords.
- if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
+ // Special case true/false/nullptr/... literals, since they will otherwise
+ // be treated as keywords.
+ // FIXME: It would be good to have a programmatic way of getting this
+ // list.
+ if (llvm::StringSwitch<bool>(RawIdent)
+ .Case("true", true)
+ .Case("false", true)
+ .Case("nullptr", true)
+ .Case("__func__", true)
+ .Case("__objc_yes__", true)
+ .Case("__objc_no__", true)
+ .Case("__null", true)
+ .Case("__FUNCDNAME__", true)
+ .Case("__FUNCSIG__", true)
+ .Case("__FUNCTION__", true)
+ .Case("__FUNCSIG__", true)
+ .Default(false)) {
Vec.emplace_back(Start, Start + Length, LiteralColor);
} else {
const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
More information about the cfe-commits
mailing list