[clang] [clang][Diagnostics] Highlight code snippets (PR #66514)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 13 00:34:28 PST 2023
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/66514 at github.com>
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/66514
>From 244bd962de82f3a7f65054086546170a88bdac6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 15 Sep 2023 15:51:39 +0200
Subject: [PATCH 01/28] [clang][Diagnostics] Highlight code snippets
Add some primitive syntax highlighting to our code snippet output.
---
.../clang/Frontend/CodeSnippetHighlighter.h | 46 +++++++
clang/include/clang/Frontend/TextDiagnostic.h | 2 +
clang/lib/Frontend/CMakeLists.txt | 1 +
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 120 ++++++++++++++++++
clang/lib/Frontend/TextDiagnostic.cpp | 26 ++++
5 files changed, 195 insertions(+)
create mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h
create mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
new file mode 100644
index 000000000000000..776954b59e2e1a8
--- /dev/null
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -0,0 +1,46 @@
+//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
+#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
+
+#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+namespace clang {
+
+struct StyleRange {
+ unsigned Start;
+ unsigned End;
+ const enum llvm::raw_ostream::Colors c;
+};
+
+class CodeSnippetHighlighter final {
+public:
+ CodeSnippetHighlighter() = default;
+
+ /// Produce StyleRanges for the given line.
+ /// The returned vector contains non-overlapping style ranges. They are sorted
+ /// from beginning of the line to the end.
+ std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+ const LangOptions &LangOpts);
+
+private:
+ bool Initialized = false;
+ /// Fills Keywords and Literals.
+ void ensureTokenData();
+
+ llvm::SmallSet<StringRef, 12> Keywords;
+ llvm::SmallSet<StringRef, 12> Literals;
+};
+
+} // namespace clang
+
+#endif
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 7eb0ab0cdc9bca8..59fd4d4f9408d48 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
#define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
+#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Frontend/DiagnosticRenderer.h"
namespace clang {
@@ -33,6 +34,7 @@ namespace clang {
/// printing coming out of libclang.
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
+ CodeSnippetHighlighter SnippetHighlighter;
public:
TextDiagnostic(raw_ostream &OS,
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index 1e5f0a859dfd568..f3547f771593093 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -42,6 +42,7 @@ add_clang_library(clangFrontend
TextDiagnosticPrinter.cpp
VerifyDiagnosticConsumer.cpp
InterfaceStubFunctionsConsumer.cpp
+ CodeSnippetHighlighter.cpp
DEPENDS
ClangDriverOptions
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
new file mode 100644
index 000000000000000..829a533ad2692e5
--- /dev/null
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -0,0 +1,120 @@
+
+#include "clang/Frontend/CodeSnippetHighlighter.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+
+void CodeSnippetHighlighter::ensureTokenData() {
+ if (Initialized)
+ return;
+
+ // List of keywords, literals and types we want to highlight.
+ // These are best-effort, as is everything we do wrt. highlighting.
+ Keywords.insert("_Static_assert");
+ Keywords.insert("auto");
+ Keywords.insert("concept");
+ Keywords.insert("const");
+ Keywords.insert("consteval");
+ Keywords.insert("constexpr");
+ Keywords.insert("delete");
+ Keywords.insert("do");
+ Keywords.insert("else");
+ Keywords.insert("final");
+ Keywords.insert("for");
+ Keywords.insert("if");
+ Keywords.insert("mutable");
+ Keywords.insert("namespace");
+ Keywords.insert("new");
+ Keywords.insert("private");
+ Keywords.insert("public");
+ Keywords.insert("requires");
+ Keywords.insert("return");
+ Keywords.insert("static");
+ Keywords.insert("static_assert");
+ Keywords.insert("using");
+ Keywords.insert("void");
+ Keywords.insert("volatile");
+ Keywords.insert("while");
+
+ // Builtin types we highlight
+ Keywords.insert("void");
+ Keywords.insert("char");
+ Keywords.insert("short");
+ Keywords.insert("int");
+ Keywords.insert("unsigned");
+ Keywords.insert("long");
+ Keywords.insert("float");
+ Keywords.insert("double");
+
+ Literals.insert("true");
+ Literals.insert("false");
+ Literals.insert("nullptr");
+
+ Initialized = true;
+}
+
+static SourceManager createTempSourceManager() {
+ FileSystemOptions FileOpts;
+ FileManager FileMgr(FileOpts);
+ llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs());
+ llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
+ DiagnosticsEngine diags(DiagIDs, DiagOpts);
+ return SourceManager(diags, FileMgr);
+}
+
+static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
+ const LangOptions &LangOpts) {
+ return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
+}
+
+std::vector<StyleRange>
+CodeSnippetHighlighter::highlightLine(StringRef SourceLine,
+ const LangOptions &LangOpts) {
+ ensureTokenData();
+
+ constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
+ constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
+ constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
+
+ const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
+ SourceManager FakeSM = createTempSourceManager();
+ Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
+ L.SetKeepWhitespaceMode(true);
+
+ std::vector<StyleRange> Styles;
+ bool Stop = false;
+ while (!Stop) {
+ Token tok;
+ Stop = L.LexFromRawLexer(tok);
+ if (tok.is(tok::unknown))
+ continue;
+
+ bool Invalid;
+ unsigned Start =
+ FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ if (tok.is(tok::raw_identifier)) {
+ // Almost everything we lex is an identifier, since we use a raw lexer.
+ // Some should be highlightes as literals, others as keywords.
+ if (Keywords.contains(tok.getRawIdentifier()))
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), KeywordColor});
+ else if (Literals.contains(tok.getRawIdentifier()))
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), LiteralColor});
+ } else if (tok::isLiteral(tok.getKind())) {
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), LiteralColor});
+ } else if (tok.is(tok::comment)) {
+ Styles.push_back(
+ StyleRange{Start, Start + tok.getLength(), CommentColor});
+ }
+ }
+
+ return Styles;
+}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 779dead5d058d1a..13d7d1e048cf991 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -11,6 +11,7 @@
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -1278,6 +1279,9 @@ void TextDiagnostic::emitSnippetAndCaret(
void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth,
unsigned LineNo) {
+ std::vector<StyleRange> Styles =
+ SnippetHighlighter.highlightLine(SourceLine, LangOpts);
+
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
@@ -1287,11 +1291,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
// Print the source line one character at a time.
bool PrintReversed = false;
+ bool HighlightingEnabled = DiagOpts->ShowColors;
size_t I = 0;
while (I < SourceLine.size()) {
auto [Str, WasPrintable] =
printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop);
+ // Just stop highlighting anything for this line if we found a non-printable
+ // character.
+ if (!WasPrintable)
+ HighlightingEnabled = false;
+
+ // FIXME: I hope we can do this in some nicer way.
+ if (HighlightingEnabled) {
+ std::optional<enum raw_ostream::Colors> H;
+ for (auto &P : Styles) {
+ if (P.Start < I && P.End >= I) {
+ H = P.c;
+ break;
+ }
+ }
+
+ if (H) {
+ OS.changeColor(*H, false);
+ } else
+ OS.resetColor();
+ }
+
// Toggle inverted colors on or off for this character.
if (DiagOpts->ShowColors) {
if (WasPrintable == PrintReversed) {
>From fb49561c11967f297e9f23c6cf294427a8d2a9e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 20 Sep 2023 15:28:10 +0200
Subject: [PATCH 02/28] Get identifier table from Preprocessor
---
.../clang/Frontend/CodeSnippetHighlighter.h | 11 +-
clang/include/clang/Frontend/TextDiagnostic.h | 7 +-
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 101 +++++-------------
clang/lib/Frontend/TextDiagnostic.cpp | 8 +-
clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +-
5 files changed, 39 insertions(+), 90 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index 776954b59e2e1a8..ec03375221f9ffc 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -22,6 +22,8 @@ struct StyleRange {
const enum llvm::raw_ostream::Colors c;
};
+class Preprocessor;
+
class CodeSnippetHighlighter final {
public:
CodeSnippetHighlighter() = default;
@@ -30,15 +32,8 @@ class CodeSnippetHighlighter final {
/// The returned vector contains non-overlapping style ranges. They are sorted
/// from beginning of the line to the end.
std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+ const Preprocessor *PP,
const LangOptions &LangOpts);
-
-private:
- bool Initialized = false;
- /// Fills Keywords and Literals.
- void ensureTokenData();
-
- llvm::SmallSet<StringRef, 12> Keywords;
- llvm::SmallSet<StringRef, 12> Literals;
};
} // namespace clang
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 59fd4d4f9408d48..8cdb9b141a8a4af 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -19,7 +19,6 @@
#include "clang/Frontend/DiagnosticRenderer.h"
namespace clang {
-
/// Class to encapsulate the logic for formatting and printing a textual
/// diagnostic message.
///
@@ -34,12 +33,12 @@ namespace clang {
/// printing coming out of libclang.
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
+ const Preprocessor *PP;
CodeSnippetHighlighter SnippetHighlighter;
public:
- TextDiagnostic(raw_ostream &OS,
- const LangOptions &LangOpts,
- DiagnosticOptions *DiagOpts);
+ TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+ const Preprocessor *PP, DiagnosticOptions *DiagOpts);
~TextDiagnostic() override;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 829a533ad2692e5..63b3707fbb7ef83 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -3,59 +3,12 @@
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
-void CodeSnippetHighlighter::ensureTokenData() {
- if (Initialized)
- return;
-
- // List of keywords, literals and types we want to highlight.
- // These are best-effort, as is everything we do wrt. highlighting.
- Keywords.insert("_Static_assert");
- Keywords.insert("auto");
- Keywords.insert("concept");
- Keywords.insert("const");
- Keywords.insert("consteval");
- Keywords.insert("constexpr");
- Keywords.insert("delete");
- Keywords.insert("do");
- Keywords.insert("else");
- Keywords.insert("final");
- Keywords.insert("for");
- Keywords.insert("if");
- Keywords.insert("mutable");
- Keywords.insert("namespace");
- Keywords.insert("new");
- Keywords.insert("private");
- Keywords.insert("public");
- Keywords.insert("requires");
- Keywords.insert("return");
- Keywords.insert("static");
- Keywords.insert("static_assert");
- Keywords.insert("using");
- Keywords.insert("void");
- Keywords.insert("volatile");
- Keywords.insert("while");
-
- // Builtin types we highlight
- Keywords.insert("void");
- Keywords.insert("char");
- Keywords.insert("short");
- Keywords.insert("int");
- Keywords.insert("unsigned");
- Keywords.insert("long");
- Keywords.insert("float");
- Keywords.insert("double");
-
- Literals.insert("true");
- Literals.insert("false");
- Literals.insert("nullptr");
-
- Initialized = true;
-}
-
static SourceManager createTempSourceManager() {
FileSystemOptions FileOpts;
FileManager FileMgr(FileOpts);
@@ -70,49 +23,51 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
}
-std::vector<StyleRange>
-CodeSnippetHighlighter::highlightLine(StringRef SourceLine,
- const LangOptions &LangOpts) {
- ensureTokenData();
-
+std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
+ StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
- const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
SourceManager FakeSM = createTempSourceManager();
+ const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
L.SetKeepWhitespaceMode(true);
std::vector<StyleRange> Styles;
bool Stop = false;
while (!Stop) {
- Token tok;
- Stop = L.LexFromRawLexer(tok);
- if (tok.is(tok::unknown))
+ Token T;
+ Stop = L.LexFromRawLexer(T);
+ if (T.is(tok::unknown))
continue;
bool Invalid;
unsigned Start =
- FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1;
+ FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;
- if (tok.is(tok::raw_identifier)) {
- // Almost everything we lex is an identifier, since we use a raw lexer.
- // Some should be highlightes as literals, others as keywords.
- if (Keywords.contains(tok.getRawIdentifier()))
- Styles.push_back(
- StyleRange{Start, Start + tok.getLength(), KeywordColor});
- else if (Literals.contains(tok.getRawIdentifier()))
+ if (T.is(tok::raw_identifier)) {
+ StringRef RawIdent = T.getRawIdentifier();
+ // Special case true/false/nullptr literals, since they will otherwise be
+ // treated as keywords.
+ if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
Styles.push_back(
- StyleRange{Start, Start + tok.getLength(), LiteralColor});
- } else if (tok::isLiteral(tok.getKind())) {
- Styles.push_back(
- StyleRange{Start, Start + tok.getLength(), LiteralColor});
- } else if (tok.is(tok::comment)) {
- Styles.push_back(
- StyleRange{Start, Start + tok.getLength(), CommentColor});
+ StyleRange{Start, Start + T.getLength(), LiteralColor});
+ } else {
+ const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
+ assert(II);
+
+ if (II->isKeyword(LangOpts)) {
+ Styles.push_back(
+ StyleRange{Start, Start + T.getLength(), KeywordColor});
+ }
+ }
+ } else if (tok::isLiteral(T.getKind())) {
+ Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor});
+ } else if (T.is(tok::comment)) {
+ Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor});
}
}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 13d7d1e048cf991..e840cdd952d09f1 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -645,10 +645,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
return Wrapped;
}
-TextDiagnostic::TextDiagnostic(raw_ostream &OS,
- const LangOptions &LangOpts,
+TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+ const Preprocessor *PP,
DiagnosticOptions *DiagOpts)
- : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {}
+ : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
TextDiagnostic::~TextDiagnostic() {}
@@ -1280,7 +1280,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth,
unsigned LineNo) {
std::vector<StyleRange> Styles =
- SnippetHighlighter.highlightLine(SourceLine, LangOpts);
+ SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts);
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index 0ff5376098ffe8d..3bc3935078baada 100644
--- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
const Preprocessor *PP) {
// Build the TextDiagnostic utility.
- TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts));
+ TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts));
}
void TextDiagnosticPrinter::EndSourceFile() {
>From 7c2dde35c6c569d7b9da388708288f685f47b2d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 20 Sep 2023 17:24:42 +0200
Subject: [PATCH 03/28] Move the PP parameter to the end of the TextDiagnostic
ctor
---
clang/include/clang/Frontend/TextDiagnostic.h | 2 +-
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 ++
clang/lib/Frontend/TextDiagnostic.cpp | 4 ++--
clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +-
4 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 8cdb9b141a8a4af..43c39ff96a2d1ce 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -38,7 +38,7 @@ class TextDiagnostic : public DiagnosticRenderer {
public:
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
- const Preprocessor *PP, DiagnosticOptions *DiagOpts);
+ DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr);
~TextDiagnostic() override;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 63b3707fbb7ef83..32bd61f3746023c 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -25,6 +25,8 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
+ if (!PP)
+ return {};
constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index e840cdd952d09f1..c9207e9dfbf9217 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -646,8 +646,8 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
}
TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
- const Preprocessor *PP,
- DiagnosticOptions *DiagOpts)
+ DiagnosticOptions *DiagOpts,
+ const Preprocessor *PP)
: DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
TextDiagnostic::~TextDiagnostic() {}
diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index 3bc3935078baada..b2fb762537573ef 100644
--- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
const Preprocessor *PP) {
// Build the TextDiagnostic utility.
- TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts));
+ TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP));
}
void TextDiagnosticPrinter::EndSourceFile() {
>From 66ddb59b9978b4f3c0552c2c16c04466390cd3a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Thu, 21 Sep 2023 06:38:24 +0200
Subject: [PATCH 04/28] Tune colors
---
clang/include/clang/Frontend/CodeSnippetHighlighter.h | 2 +-
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 6 +++---
clang/lib/Frontend/TextDiagnostic.cpp | 7 ++++---
3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index ec03375221f9ffc..c2a0184085d5da4 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -19,7 +19,7 @@ namespace clang {
struct StyleRange {
unsigned Start;
unsigned End;
- const enum llvm::raw_ostream::Colors c;
+ const enum llvm::raw_ostream::Colors color;
};
class Preprocessor;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 32bd61f3746023c..dba7f5d2848505a 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -27,9 +27,9 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
if (!PP)
return {};
- constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
- constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
- constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
+ constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
+ constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
+ constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
SourceManager FakeSM = createTempSourceManager();
const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index c9207e9dfbf9217..35a92a8044f2e52 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <optional>
@@ -1307,14 +1308,14 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
std::optional<enum raw_ostream::Colors> H;
for (auto &P : Styles) {
if (P.Start < I && P.End >= I) {
- H = P.c;
+ H = P.color;
break;
}
}
- if (H) {
+ if (H)
OS.changeColor(*H, false);
- } else
+ else
OS.resetColor();
}
>From 3dd87edb69c38b23d27fc0dc2651e9492badef65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Thu, 21 Sep 2023 11:01:43 +0200
Subject: [PATCH 05/28] Lex the entire file
---
.../clang/Frontend/CodeSnippetHighlighter.h | 7 +-
clang/include/clang/Frontend/TextDiagnostic.h | 3 +-
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 138 ++++++++++++------
clang/lib/Frontend/TextDiagnostic.cpp | 14 +-
4 files changed, 111 insertions(+), 51 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index c2a0184085d5da4..51c14880fb95485 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -23,6 +23,8 @@ struct StyleRange {
};
class Preprocessor;
+class FileID;
+class SourceManager;
class CodeSnippetHighlighter final {
public:
@@ -31,9 +33,10 @@ class CodeSnippetHighlighter final {
/// Produce StyleRanges for the given line.
/// The returned vector contains non-overlapping style ranges. They are sorted
/// from beginning of the line to the end.
- std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+ std::vector<StyleRange> highlightLine(unsigned LineNumber,
const Preprocessor *PP,
- const LangOptions &LangOpts);
+ const LangOptions &LangOpts, FileID FID,
+ const SourceManager &SM);
};
} // namespace clang
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 43c39ff96a2d1ce..102b33aedd5ef98 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -105,7 +105,8 @@ class TextDiagnostic : public DiagnosticRenderer {
ArrayRef<FixItHint> Hints);
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
- unsigned LineNo);
+ FileID FID, const SourceManager &SM, unsigned LineNo,
+ unsigned DisplayLineNo);
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index dba7f5d2848505a..d319e690a355b2b 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -9,34 +9,47 @@
using namespace clang;
-static SourceManager createTempSourceManager() {
- FileSystemOptions FileOpts;
- FileManager FileMgr(FileOpts);
- llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs());
- llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
- DiagnosticsEngine diags(DiagIDs, DiagOpts);
- return SourceManager(diags, FileMgr);
-}
-
-static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
- const LangOptions &LangOpts) {
- return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
-}
+static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
+static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
- StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
+ unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
+ FileID FID, const SourceManager &SM) {
if (!PP)
return {};
- constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
- constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
- constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
- SourceManager FakeSM = createTempSourceManager();
- const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
- Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
+ // Classify the given token and append it to the given vector.
+ auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec,
+ const Token &T, unsigned Start,
+ unsigned Length) -> void {
+ if (T.is(tok::raw_identifier)) {
+ StringRef RawIdent = T.getRawIdentifier();
+ // Special case true/false/nullptr literals, since they will otherwise be
+ // treated as keywords.
+ if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
+ Vec.push_back(StyleRange{Start, Start + Length, LiteralColor});
+ } else {
+ const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
+ assert(II);
+
+ if (II->isKeyword(LangOpts)) {
+ Vec.push_back(StyleRange{Start, Start + Length, KeywordColor});
+ }
+ }
+ } else if (tok::isLiteral(T.getKind())) {
+ Vec.push_back(StyleRange{Start, Start + Length, LiteralColor});
+ } else if (T.is(tok::comment)) {
+ Vec.push_back(StyleRange{Start, Start + Length, CommentColor});
+ }
+ };
+
+ auto Buff = SM.getBufferOrNone(FID);
+ assert(Buff);
+ Lexer L = Lexer(FID, *Buff, SM, LangOpts);
L.SetKeepWhitespaceMode(true);
+ std::vector<std::vector<StyleRange>> Lines;
- std::vector<StyleRange> Styles;
bool Stop = false;
while (!Stop) {
Token T;
@@ -45,33 +58,74 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
continue;
bool Invalid;
- unsigned Start =
- FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+ unsigned StartCol =
+ SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+ unsigned StartLine =
+ SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;
- if (T.is(tok::raw_identifier)) {
- StringRef RawIdent = T.getRawIdentifier();
- // Special case true/false/nullptr literals, since they will otherwise be
- // treated as keywords.
- if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
- Styles.push_back(
- StyleRange{Start, Start + T.getLength(), LiteralColor});
- } else {
- const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
- assert(II);
+ while (Lines.size() <= StartLine)
+ Lines.push_back({});
- if (II->isKeyword(LangOpts)) {
- Styles.push_back(
- StyleRange{Start, Start + T.getLength(), KeywordColor});
- }
+ unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ // Simple tokens.
+ if (StartLine == EndLine) {
+ appendStyle(Lines[StartLine], T, StartCol, T.getLength());
+ continue;
+ }
+ unsigned NumLines = EndLine - StartLine;
+
+ // For tokens that span multiple lines (think multiline comments), we
+ // divide them into multiple StyleRanges.
+ unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
+
+ unsigned L = 0;
+ unsigned LineLength = 0;
+ for (unsigned I = 0; I <= Spelling.size(); ++I) {
+ // This line is done.
+ if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) {
+ while (Lines.size() <= StartLine + L)
+ Lines.push_back({});
+
+ if (L == 0) // First line
+ appendStyle(Lines[StartLine + L], T, StartCol, LineLength);
+ else if (L == NumLines) // Last line
+ appendStyle(Lines[StartLine + L], T, 0, EndCol);
+ else
+ appendStyle(Lines[StartLine + L], T, 0, LineLength);
+ ++L;
+ LineLength = 0;
+ continue;
}
- } else if (tok::isLiteral(T.getKind())) {
- Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor});
- } else if (T.is(tok::comment)) {
- Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor});
+ ++LineLength;
+ }
+ }
+
+#if 0
+ llvm::errs() << "--\nLine Style info: \n";
+ int I = 0;
+ for (std::vector<StyleRange> &Line : Lines) {
+ llvm::errs() << I << ": ";
+ for (const auto &R : Line) {
+ llvm::errs() << "{" << R.Start << ", " << R.End << "}, ";
}
+ llvm::errs() << "\n";
+
+ ++I;
}
+#endif
- return Styles;
+ while (Lines.size() <= LineNumber)
+ Lines.push_back({});
+ return Lines[LineNumber];
}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 35a92a8044f2e52..5aea7b8f4210749 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1249,7 +1249,8 @@ void TextDiagnostic::emitSnippetAndCaret(
}
// Emit what we have computed.
- emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
+ emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo,
+ DisplayLineNo);
if (!CaretLine.empty()) {
indentForLineNumbers();
@@ -1278,16 +1279,17 @@ void TextDiagnostic::emitSnippetAndCaret(
}
void TextDiagnostic::emitSnippet(StringRef SourceLine,
- unsigned MaxLineNoDisplayWidth,
- unsigned LineNo) {
+ unsigned MaxLineNoDisplayWidth, FileID FID,
+ const SourceManager &SM, unsigned LineNo,
+ unsigned DisplayLineNo) {
std::vector<StyleRange> Styles =
- SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts);
+ SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM);
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
- unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
+ unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo);
OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1)
- << LineNo << " | ";
+ << DisplayLineNo << " | ";
}
// Print the source line one character at a time.
>From c3ac956e8956675bf5b438d5a56f2e096fa69242 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 22 Sep 2023 06:48:55 +0200
Subject: [PATCH 06/28] Try to fix PCH test
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index d319e690a355b2b..8905fbfb29b8927 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -19,6 +19,10 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
if (!PP)
return {};
+ // Might cause emission of another diagnostic.
+ if (PP->getIdentifierTable().getExternalIdentifierLookup())
+ return {};
+
// Classify the given token and append it to the given vector.
auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec,
const Token &T, unsigned Start,
>From ee6212d6a25bb6d5aaf5e3c7a32f473346da8c11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 26 Sep 2023 08:11:58 +0200
Subject: [PATCH 07/28] Measurements
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 24 +++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 8905fbfb29b8927..17614a962ee6a7f 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -6,6 +6,7 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/Support/raw_ostream.h"
+#include <chrono>
using namespace clang;
@@ -16,6 +17,9 @@ static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
FileID FID, const SourceManager &SM) {
+ std::chrono::steady_clock::time_point begin =
+ std::chrono::steady_clock::now();
+
if (!PP)
return {};
@@ -23,6 +27,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
if (PP->getIdentifierTable().getExternalIdentifierLookup())
return {};
+ size_t NTokens = 0;
// Classify the given token and append it to the given vector.
auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec,
const Token &T, unsigned Start,
@@ -56,6 +61,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
bool Stop = false;
while (!Stop) {
+ ++NTokens;
Token T;
Stop = L.LexFromRawLexer(T);
if (T.is(tok::unknown))
@@ -131,5 +137,23 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
while (Lines.size() <= LineNumber)
Lines.push_back({});
+
+ std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
+ llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens
+ << " Tokens\n";
+ llvm::errs() << "That took "
+ << std::chrono::duration_cast<std::chrono::microseconds>(end -
+ begin)
+ .count()
+ << " microseconds\n";
+ llvm::errs() << "That took "
+ << std::chrono::duration_cast<std::chrono::milliseconds>(end -
+ begin)
+ .count()
+ << " milliseconds\n";
+ llvm::errs()
+ << "That took "
+ << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count()
+ << " seconds\n";
return Lines[LineNumber];
}
>From 29340454cabc61090e63cf38dd5ab32eb8059ea0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 27 Sep 2023 08:05:09 +0200
Subject: [PATCH 08/28] Slightly improve performance by bailing out earlier
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 17614a962ee6a7f..71e5c30e56d146a 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -41,14 +41,13 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
} else {
const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
assert(II);
-
- if (II->isKeyword(LangOpts)) {
+ if (II->isKeyword(LangOpts))
Vec.push_back(StyleRange{Start, Start + Length, KeywordColor});
- }
}
} else if (tok::isLiteral(T.getKind())) {
Vec.push_back(StyleRange{Start, Start + Length, LiteralColor});
- } else if (T.is(tok::comment)) {
+ } else {
+ assert(T.is(tok::comment));
Vec.push_back(StyleRange{Start, Start + Length, CommentColor});
}
};
@@ -67,6 +66,11 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
if (T.is(tok::unknown))
continue;
+ // We are only interested in identifiers, literals and comments.
+ if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
+ !tok::isLiteral(T.getKind()))
+ continue;
+
bool Invalid;
unsigned StartCol =
SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
@@ -138,6 +142,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
while (Lines.size() <= LineNumber)
Lines.push_back({});
+#if 0
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens
<< " Tokens\n";
@@ -155,5 +160,6 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
<< "That took "
<< std::chrono::duration_cast<std::chrono::seconds>(end - begin).count()
<< " seconds\n";
+#endif
return Lines[LineNumber];
}
>From 13568253ba9833cf1906f71f1ba613f85cc6ea75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 27 Sep 2023 10:45:36 +0200
Subject: [PATCH 09/28] Only care about tokens that touch our LineNumber.
---
.../clang/Frontend/CodeSnippetHighlighter.h | 2 +-
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 19 +++++++++++++------
clang/lib/Frontend/TextDiagnostic.cpp | 2 +-
3 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index 51c14880fb95485..a65bd3991d4eff2 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -19,7 +19,7 @@ namespace clang {
struct StyleRange {
unsigned Start;
unsigned End;
- const enum llvm::raw_ostream::Colors color;
+ const enum llvm::raw_ostream::Colors Color;
};
class Preprocessor;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 71e5c30e56d146a..7663155c6c83923 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -71,23 +71,30 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
!tok::isLiteral(T.getKind()))
continue;
- bool Invalid;
- unsigned StartCol =
- SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+ bool Invalid = false;
+ unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1;
if (Invalid)
continue;
+
+ if (EndLine < LineNumber)
+ continue;
unsigned StartLine =
SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;
+ if (StartLine > LineNumber)
+ break;
- while (Lines.size() <= StartLine)
- Lines.push_back({});
+ // Must have an intersection at this point
+ assert(StartLine <= LineNumber && EndLine >= LineNumber);
- unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1;
+ unsigned StartCol =
+ SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;
+ while (Lines.size() <= StartLine)
+ Lines.push_back({});
// Simple tokens.
if (StartLine == EndLine) {
appendStyle(Lines[StartLine], T, StartCol, T.getLength());
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 5aea7b8f4210749..f2793d23522f1a5 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1310,7 +1310,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
std::optional<enum raw_ostream::Colors> H;
for (auto &P : Styles) {
if (P.Start < I && P.End >= I) {
- H = P.color;
+ H = P.Color;
break;
}
}
>From c4822243ddd95da600dc86495a59fba32e6b313d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 6 Oct 2023 15:28:25 +0200
Subject: [PATCH 10/28] Add checkpoints to Preprocessor
---
.../clang/Frontend/CodeSnippetHighlighter.h | 13 ++--
clang/include/clang/Frontend/TextDiagnostic.h | 2 +-
clang/include/clang/Lex/Preprocessor.h | 5 ++
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 64 +++++++++++--------
clang/lib/Frontend/TextDiagnostic.cpp | 10 +--
clang/lib/Lex/Preprocessor.cpp | 27 ++++++++
6 files changed, 81 insertions(+), 40 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index a65bd3991d4eff2..451a182b3e35317 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -10,16 +10,15 @@
#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
#include "clang/Basic/LangOptions.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/raw_ostream.h"
-#include <vector>
namespace clang {
struct StyleRange {
unsigned Start;
unsigned End;
- const enum llvm::raw_ostream::Colors Color;
+ enum llvm::raw_ostream::Colors Color;
};
class Preprocessor;
@@ -33,10 +32,10 @@ class CodeSnippetHighlighter final {
/// Produce StyleRanges for the given line.
/// The returned vector contains non-overlapping style ranges. They are sorted
/// from beginning of the line to the end.
- std::vector<StyleRange> highlightLine(unsigned LineNumber,
- const Preprocessor *PP,
- const LangOptions &LangOpts, FileID FID,
- const SourceManager &SM);
+ llvm::SmallVector<StyleRange>
+ highlightLine(unsigned LineNumber, const Preprocessor *PP,
+ const LangOptions &LangOpts, FileID FID,
+ const SourceManager &SM, const char *LineStart);
};
} // namespace clang
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 102b33aedd5ef98..ecd5bb4a4f568dc 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -106,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer {
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
FileID FID, const SourceManager &SM, unsigned LineNo,
- unsigned DisplayLineNo);
+ unsigned DisplayLineNo, const char *LineStart);
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4ec21a8b6be2c85..07c44794520f667 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -128,6 +128,7 @@ enum MacroUse {
class Preprocessor {
friend class VAOptDefinitionContext;
friend class VariadicMacroScopeGuard;
+ friend class CodeSnippetHighlighter;
llvm::unique_function<void(const clang::Token &)> OnToken;
std::shared_ptr<PreprocessorOptions> PPOpts;
@@ -141,6 +142,10 @@ class Preprocessor {
HeaderSearch &HeaderInfo;
ModuleLoader &TheModuleLoader;
+ llvm::SmallVector<const char *> CheckPoints;
+ void saveCheckPoint(const char *P);
+ const char *getSaveFor(const char *S) const;
+
/// External source of macros.
ExternalPreprocessorSource *ExternalSource;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 7663155c6c83923..28b66d4c05b8c8b 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -14,9 +14,9 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
-std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
+llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
- FileID FID, const SourceManager &SM) {
+ FileID FID, const SourceManager &SM, const char *LineStart) {
std::chrono::steady_clock::time_point begin =
std::chrono::steady_clock::now();
@@ -29,7 +29,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
size_t NTokens = 0;
// Classify the given token and append it to the given vector.
- auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec,
+ auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec,
const Token &T, unsigned Start,
unsigned Length) -> void {
if (T.is(tok::raw_identifier)) {
@@ -52,12 +52,23 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
}
};
+ // Figure out where to start lexing from.
auto Buff = SM.getBufferOrNone(FID);
assert(Buff);
Lexer L = Lexer(FID, *Buff, SM, LangOpts);
L.SetKeepWhitespaceMode(true);
- std::vector<std::vector<StyleRange>> Lines;
+ // Seek to the last save point before the start of the line.
+ if (const char *Save = PP->getSaveFor(LineStart);
+ Buff->getBufferStart() <= Save && Save < Buff->getBufferEnd()) {
+ size_t Offset = Save - Buff->getBufferStart();
+ assert(Save >= Buff->getBufferStart());
+ assert(Save <= Buff->getBufferEnd());
+
+ L.seek(Offset, /*IsAtStartOfLine=*/true);
+ }
+
+ llvm::SmallVector<StyleRange> LineRanges;
bool Stop = false;
while (!Stop) {
++NTokens;
@@ -93,14 +104,13 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
if (Invalid)
continue;
- while (Lines.size() <= StartLine)
- Lines.push_back({});
// Simple tokens.
if (StartLine == EndLine) {
- appendStyle(Lines[StartLine], T, StartCol, T.getLength());
+ appendStyle(LineRanges, T, StartCol, T.getLength());
continue;
}
unsigned NumLines = EndLine - StartLine;
+ assert(NumLines >= 1);
// For tokens that span multiple lines (think multiline comments), we
// divide them into multiple StyleRanges.
@@ -115,15 +125,17 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
for (unsigned I = 0; I <= Spelling.size(); ++I) {
// This line is done.
if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) {
- while (Lines.size() <= StartLine + L)
- Lines.push_back({});
-
- if (L == 0) // First line
- appendStyle(Lines[StartLine + L], T, StartCol, LineLength);
- else if (L == NumLines) // Last line
- appendStyle(Lines[StartLine + L], T, 0, EndCol);
- else
- appendStyle(Lines[StartLine + L], T, 0, LineLength);
+ if (StartLine + L == LineNumber) {
+ if (L == 0) // First line
+ appendStyle(LineRanges, T, StartCol, LineLength);
+ else if (L == NumLines) // Last line
+ appendStyle(LineRanges, T, 0, EndCol);
+ else
+ appendStyle(LineRanges, T, 0, LineLength);
+
+ // We only do one line, so we're done.
+ break;
+ }
++L;
LineLength = 0;
continue;
@@ -134,25 +146,21 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
#if 0
llvm::errs() << "--\nLine Style info: \n";
- int I = 0;
- for (std::vector<StyleRange> &Line : Lines) {
- llvm::errs() << I << ": ";
- for (const auto &R : Line) {
+ //int I = 0;
+ //for (std::vector<StyleRange> &Line : Lines) {
+ //llvm::errs() << I << ": ";
+ for (const auto &R : LineRanges) {
llvm::errs() << "{" << R.Start << ", " << R.End << "}, ";
}
llvm::errs() << "\n";
- ++I;
- }
+ //++I;
+ //}
#endif
- while (Lines.size() <= LineNumber)
- Lines.push_back({});
-
#if 0
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
- llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens
- << " Tokens\n";
+ llvm::errs() << "Lexed " << NTokens << " Tokens\n";
llvm::errs() << "That took "
<< std::chrono::duration_cast<std::chrono::microseconds>(end -
begin)
@@ -168,5 +176,5 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
<< std::chrono::duration_cast<std::chrono::seconds>(end - begin).count()
<< " seconds\n";
#endif
- return Lines[LineNumber];
+ return LineRanges;
}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index f2793d23522f1a5..cbc0cfacec20f0e 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -13,6 +13,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
@@ -1250,7 +1251,7 @@ void TextDiagnostic::emitSnippetAndCaret(
// Emit what we have computed.
emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo,
- DisplayLineNo);
+ DisplayLineNo, LineStart);
if (!CaretLine.empty()) {
indentForLineNumbers();
@@ -1281,9 +1282,10 @@ void TextDiagnostic::emitSnippetAndCaret(
void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth, FileID FID,
const SourceManager &SM, unsigned LineNo,
- unsigned DisplayLineNo) {
- std::vector<StyleRange> Styles =
- SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM);
+ unsigned DisplayLineNo,
+ const char *LineStart) {
+ llvm::SmallVector<StyleRange> Styles = SnippetHighlighter.highlightLine(
+ LineNo - 1, PP, LangOpts, FID, SM, LineStart);
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 64f54c6fc6382f2..d865326bcfa6dda 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -546,6 +546,7 @@ void Preprocessor::EnterMainSourceFile() {
// information) and predefined macros aren't guaranteed to be set properly.
assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
FileID MainFileID = SourceMgr.getMainFileID();
+ // llvm::errs() << "##### Main source file: " << (int)MainFileID << "\n";
// If MainFileID is loaded it means we loaded an AST file, no need to enter
// a main file.
@@ -862,6 +863,32 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
return true;
}
+void Preprocessor::saveCheckPoint(const char *P) {
+ static constexpr ptrdiff_t Limit = 1000;
+ if (CheckPoints.empty()) {
+ CheckPoints.push_back(P);
+ return;
+ }
+
+ const char *Cur = CheckPoints.back();
+ if (Cur == P)
+ return;
+ if ((P - Cur) > Limit)
+ CheckPoints.push_back(P);
+}
+
+const char *Preprocessor::getSaveFor(const char *S) const {
+ const char *C = S;
+ // FIXME: Use std::lower_bound or something smart. Aaron knows what I'm
+ // talking about.
+ for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) {
+ C = CheckPoints[I];
+ if (CheckPoints[I] <= S)
+ break;
+ }
+ return C;
+}
+
void Preprocessor::Lex(Token &Result) {
++LexLevel;
>From 51001fffd8f42ccd2b0225780435503b6450e46c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sat, 7 Oct 2023 12:35:21 +0200
Subject: [PATCH 11/28] Add missing license header
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 28b66d4c05b8c8b..042745fc639ec39 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -1,3 +1,10 @@
+//===-- CodeSnippetHighlighter.cpp - Code snippet highlighting --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Basic/DiagnosticOptions.h"
>From 3b4487d81cb79553e1d55a79d8e145a8f0d82e03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 10 Oct 2023 14:04:43 +0200
Subject: [PATCH 12/28] Fewer checkpoints
---
clang/lib/Lex/Preprocessor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index d865326bcfa6dda..f5366d04a09e5ad 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -864,7 +864,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
}
void Preprocessor::saveCheckPoint(const char *P) {
- static constexpr ptrdiff_t Limit = 1000;
+ static constexpr ptrdiff_t Limit = 1024 * 8;
if (CheckPoints.empty()) {
CheckPoints.push_back(P);
return;
>From 11c1d838a5f8c0f5cf51423918d01d834032f4ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 13 Oct 2023 11:11:06 +0200
Subject: [PATCH 13/28] Cleanup
---
clang/lib/Lex/Preprocessor.cpp | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index f5366d04a09e5ad..bc57cd663d04fd8 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -546,7 +546,6 @@ void Preprocessor::EnterMainSourceFile() {
// information) and predefined macros aren't guaranteed to be set properly.
assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
FileID MainFileID = SourceMgr.getMainFileID();
- // llvm::errs() << "##### Main source file: " << (int)MainFileID << "\n";
// If MainFileID is loaded it means we loaded an AST file, no need to enter
// a main file.
@@ -878,15 +877,8 @@ void Preprocessor::saveCheckPoint(const char *P) {
}
const char *Preprocessor::getSaveFor(const char *S) const {
- const char *C = S;
- // FIXME: Use std::lower_bound or something smart. Aaron knows what I'm
- // talking about.
- for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) {
- C = CheckPoints[I];
- if (CheckPoints[I] <= S)
- break;
- }
- return C;
+ auto It = llvm::lower_bound(CheckPoints, S, std::less<const char *>());
+ return *It;
}
void Preprocessor::Lex(Token &Result) {
>From 6f724d08c00e781e0867ef1956335874851baf12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 15 Oct 2023 17:00:36 +0200
Subject: [PATCH 14/28] Address some review comments
---
clang/include/clang/Frontend/CodeSnippetHighlighter.h | 2 ++
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 9 +++++----
clang/lib/Frontend/TextDiagnostic.cpp | 1 -
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index 451a182b3e35317..cb3c96f69293795 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -19,6 +19,8 @@ struct StyleRange {
unsigned Start;
unsigned End;
enum llvm::raw_ostream::Colors Color;
+ StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C)
+ : Start(S), End(E), Color(C){};
};
class Preprocessor;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 042745fc639ec39..30c4c791cb4f824 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Frontend/CodeSnippetHighlighter.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
@@ -49,13 +50,13 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
assert(II);
if (II->isKeyword(LangOpts))
- Vec.push_back(StyleRange{Start, Start + Length, KeywordColor});
+ Vec.emplace_back(Start, Start + Length, KeywordColor);
}
} else if (tok::isLiteral(T.getKind())) {
- Vec.push_back(StyleRange{Start, Start + Length, LiteralColor});
+ Vec.emplace_back(Start, Start + Length, LiteralColor);
} else {
assert(T.is(tok::comment));
- Vec.push_back(StyleRange{Start, Start + Length, CommentColor});
+ Vec.emplace_back(Start, Start + Length, CommentColor);
}
};
@@ -131,7 +132,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
unsigned LineLength = 0;
for (unsigned I = 0; I <= Spelling.size(); ++I) {
// This line is done.
- if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) {
+ if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
if (StartLine + L == LineNumber) {
if (L == 0) // First line
appendStyle(LineRanges, T, StartCol, LineLength);
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index cbc0cfacec20f0e..033d21656b12724 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <optional>
>From 681d129d358df87bb6ac6d201f38429d0e0c71f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Mon, 16 Oct 2023 07:21:41 +0200
Subject: [PATCH 15/28] Fix highlighting and add another assertion
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 1 +
clang/lib/Lex/Preprocessor.cpp | 14 ++++++++++++--
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 30c4c791cb4f824..7a3fdc1e0d16ea8 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -72,6 +72,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
size_t Offset = Save - Buff->getBufferStart();
assert(Save >= Buff->getBufferStart());
assert(Save <= Buff->getBufferEnd());
+ assert(Save <= LineStart);
L.seek(Offset, /*IsAtStartOfLine=*/true);
}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index bc57cd663d04fd8..bc8302d7fb9489a 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -876,9 +876,19 @@ void Preprocessor::saveCheckPoint(const char *P) {
CheckPoints.push_back(P);
}
+/// We want to always return a value lower than \p S.
+/// If there is no such checkpoint, return nullptr.
const char *Preprocessor::getSaveFor(const char *S) const {
- auto It = llvm::lower_bound(CheckPoints, S, std::less<const char *>());
- return *It;
+ const char *Result = nullptr;
+ for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) {
+ const char *C = CheckPoints[I];
+ if (C <= S) {
+ Result = C;
+ break;
+ }
+ }
+
+ return Result;
}
void Preprocessor::Lex(Token &Result) {
>From 62c9e29a1eae59ea6a8927cf81dc212639d47c16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Mon, 16 Oct 2023 07:51:10 +0200
Subject: [PATCH 16/28] Change colors one last time
To match those used in LLDB
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 7a3fdc1e0d16ea8..316a151c9943c90 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -18,9 +18,9 @@
using namespace clang;
-static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
-static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
-static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
+static constexpr raw_ostream::Colors CommentColor = raw_ostream::MAGENTA;
+static constexpr raw_ostream::Colors LiteralColor = raw_ostream::RED;
+static constexpr raw_ostream::Colors KeywordColor = raw_ostream::GREEN;
llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
@@ -45,7 +45,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
// Special case true/false/nullptr literals, since they will otherwise be
// treated as keywords.
if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
- Vec.push_back(StyleRange{Start, Start + Length, LiteralColor});
+ Vec.emplace_back(Start, Start + Length, LiteralColor);
} else {
const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
assert(II);
>From 47dc64d00d5ffecd1b9726716c4d7148a28e7f41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 17 Oct 2023 06:13:12 +0200
Subject: [PATCH 17/28] Address review comments
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +-
clang/lib/Frontend/TextDiagnostic.cpp | 15 +++++----------
clang/lib/Lex/Preprocessor.cpp | 16 ++++++----------
3 files changed, 12 insertions(+), 21 deletions(-)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 316a151c9943c90..715c113d519438d 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -20,7 +20,7 @@ using namespace clang;
static constexpr raw_ostream::Colors CommentColor = raw_ostream::MAGENTA;
static constexpr raw_ostream::Colors LiteralColor = raw_ostream::RED;
-static constexpr raw_ostream::Colors KeywordColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 033d21656b12724..1378b3a0812e598 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1306,18 +1306,13 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
if (!WasPrintable)
HighlightingEnabled = false;
- // FIXME: I hope we can do this in some nicer way.
if (HighlightingEnabled) {
- std::optional<enum raw_ostream::Colors> H;
- for (auto &P : Styles) {
- if (P.Start < I && P.End >= I) {
- H = P.Color;
- break;
- }
- }
+ const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) {
+ return (R.Start < I && R.End >= I);
+ });
- if (H)
- OS.changeColor(*H, false);
+ if (CharStyle != Styles.end())
+ OS.changeColor(CharStyle->Color, false);
else
OS.resetColor();
}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index bc8302d7fb9489a..c7ee33e7b31bd06 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -163,6 +163,8 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
PreambleConditionalStack.startRecording();
MaxTokens = LangOpts.MaxTokens;
+
+ CheckPoints.push_back(nullptr);
}
Preprocessor::~Preprocessor() {
@@ -862,17 +864,11 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
return true;
}
+static constexpr ptrdiff_t CheckPointLimit = 1024 * 8;
void Preprocessor::saveCheckPoint(const char *P) {
- static constexpr ptrdiff_t Limit = 1024 * 8;
- if (CheckPoints.empty()) {
- CheckPoints.push_back(P);
- return;
- }
-
- const char *Cur = CheckPoints.back();
- if (Cur == P)
- return;
- if ((P - Cur) > Limit)
+ assert(!CheckPoints.empty());
+ assert(CheckPoints.back() != P);
+ if ((P - CheckPoints.back()) > CheckPointLimit)
CheckPoints.push_back(P);
}
>From 676a68fc9cc6c0788c2b65a009d4bcd176f49afd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 17 Oct 2023 07:44:11 +0200
Subject: [PATCH 18/28] Rename lexer API
---
clang/include/clang/Lex/Preprocessor.h | 9 ++++++---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +-
clang/lib/Lex/Preprocessor.cpp | 4 ++--
3 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 07c44794520f667..05bf87d584b8db4 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -141,10 +141,7 @@ class Preprocessor {
std::unique_ptr<ScratchBuffer> ScratchBuf;
HeaderSearch &HeaderInfo;
ModuleLoader &TheModuleLoader;
-
llvm::SmallVector<const char *> CheckPoints;
- void saveCheckPoint(const char *P);
- const char *getSaveFor(const char *S) const;
/// External source of macros.
ExternalPreprocessorSource *ExternalSource;
@@ -1323,6 +1320,11 @@ class Preprocessor {
OnToken = std::move(F);
}
+ /// Returns a pointer into the main file's buffer that's guaranteed to be
+ /// after a fully lexed token. This can be used to partially lex a file
+ /// without starting in the middle of a token.
+ const char *getCompleteTokenCheckpoint(const char *P) const;
+
void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
bool isMacroDefined(StringRef Id) {
@@ -2263,6 +2265,7 @@ class Preprocessor {
const char *getCurLexerEndPos();
void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
+ void saveCheckPoint(const char *P);
public:
void PoisonSEHIdentifiers(bool Poison = true); // Borland
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 715c113d519438d..1292469f80a0734 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -67,7 +67,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
L.SetKeepWhitespaceMode(true);
// Seek to the last save point before the start of the line.
- if (const char *Save = PP->getSaveFor(LineStart);
+ if (const char *Save = PP->getCompleteTokenCheckpoint(LineStart);
Buff->getBufferStart() <= Save && Save < Buff->getBufferEnd()) {
size_t Offset = Save - Buff->getBufferStart();
assert(Save >= Buff->getBufferStart());
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c7ee33e7b31bd06..7dc96e686d0619d 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -874,11 +874,11 @@ void Preprocessor::saveCheckPoint(const char *P) {
/// We want to always return a value lower than \p S.
/// If there is no such checkpoint, return nullptr.
-const char *Preprocessor::getSaveFor(const char *S) const {
+const char *Preprocessor::getCompleteTokenCheckpoint(const char *P) const {
const char *Result = nullptr;
for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) {
const char *C = CheckPoints[I];
- if (C <= S) {
+ if (C <= P) {
Result = C;
break;
}
>From 1e67df04a8da5ba10baf341217a8b4ef3cede1d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 20 Oct 2023 09:43:46 +0200
Subject: [PATCH 19/28] Just don't highlight in files >1MB
---
clang/include/clang/Lex/Preprocessor.h | 7 ----
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 34 ++++++++-----------
clang/lib/Lex/Preprocessor.cpp | 25 --------------
3 files changed, 15 insertions(+), 51 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 05bf87d584b8db4..b1c2807e35a3149 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -141,7 +141,6 @@ class Preprocessor {
std::unique_ptr<ScratchBuffer> ScratchBuf;
HeaderSearch &HeaderInfo;
ModuleLoader &TheModuleLoader;
- llvm::SmallVector<const char *> CheckPoints;
/// External source of macros.
ExternalPreprocessorSource *ExternalSource;
@@ -1320,11 +1319,6 @@ class Preprocessor {
OnToken = std::move(F);
}
- /// Returns a pointer into the main file's buffer that's guaranteed to be
- /// after a fully lexed token. This can be used to partially lex a file
- /// without starting in the middle of a token.
- const char *getCompleteTokenCheckpoint(const char *P) const;
-
void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
bool isMacroDefined(StringRef Id) {
@@ -2265,7 +2259,6 @@ class Preprocessor {
const char *getCurLexerEndPos();
void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
- void saveCheckPoint(const char *P);
public:
void PoisonSEHIdentifiers(bool Poison = true); // Borland
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 1292469f80a0734..a1ca68227323469 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -18,9 +18,15 @@
using namespace clang;
-static constexpr raw_ostream::Colors CommentColor = raw_ostream::MAGENTA;
-static constexpr raw_ostream::Colors LiteralColor = raw_ostream::RED;
+// Magenta is taken for 'warning'. Red is already 'error' and 'cya'
+// is already taken for 'note'. Green is already used to underline
+// source ranges. White and black are bad because of the usual
+// terminal backgrounds. Which leaves us only with TWO options.
+static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
+static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
+/// Maximum size of file we still highlight.
+static constexpr size_t MaxBufferSize = 1024 * 1024; // 1MB.
llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
@@ -35,6 +41,13 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
if (PP->getIdentifierTable().getExternalIdentifierLookup())
return {};
+ auto Buff = SM.getBufferOrNone(FID);
+ if (!Buff || Buff->getBufferSize() > MaxBufferSize)
+ return {};
+
+ Lexer L = Lexer(FID, *Buff, SM, LangOpts);
+ L.SetKeepWhitespaceMode(true);
+
size_t NTokens = 0;
// Classify the given token and append it to the given vector.
auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec,
@@ -60,23 +73,6 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
}
};
- // Figure out where to start lexing from.
- auto Buff = SM.getBufferOrNone(FID);
- assert(Buff);
- Lexer L = Lexer(FID, *Buff, SM, LangOpts);
- L.SetKeepWhitespaceMode(true);
-
- // Seek to the last save point before the start of the line.
- if (const char *Save = PP->getCompleteTokenCheckpoint(LineStart);
- Buff->getBufferStart() <= Save && Save < Buff->getBufferEnd()) {
- size_t Offset = Save - Buff->getBufferStart();
- assert(Save >= Buff->getBufferStart());
- assert(Save <= Buff->getBufferEnd());
- assert(Save <= LineStart);
-
- L.seek(Offset, /*IsAtStartOfLine=*/true);
- }
-
llvm::SmallVector<StyleRange> LineRanges;
bool Stop = false;
while (!Stop) {
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 7dc96e686d0619d..64f54c6fc6382f2 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -163,8 +163,6 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
PreambleConditionalStack.startRecording();
MaxTokens = LangOpts.MaxTokens;
-
- CheckPoints.push_back(nullptr);
}
Preprocessor::~Preprocessor() {
@@ -864,29 +862,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
return true;
}
-static constexpr ptrdiff_t CheckPointLimit = 1024 * 8;
-void Preprocessor::saveCheckPoint(const char *P) {
- assert(!CheckPoints.empty());
- assert(CheckPoints.back() != P);
- if ((P - CheckPoints.back()) > CheckPointLimit)
- CheckPoints.push_back(P);
-}
-
-/// We want to always return a value lower than \p S.
-/// If there is no such checkpoint, return nullptr.
-const char *Preprocessor::getCompleteTokenCheckpoint(const char *P) const {
- const char *Result = nullptr;
- for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) {
- const char *C = CheckPoints[I];
- if (C <= P) {
- Result = C;
- break;
- }
- }
-
- return Result;
-}
-
void Preprocessor::Lex(Token &Result) {
++LexLevel;
>From 8e80693c1d60428206ab90c028979a228ba202b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 29 Oct 2023 09:22:47 +0100
Subject: [PATCH 20/28] Fix a typo
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index a1ca68227323469..90ab5a4927efb39 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -18,7 +18,7 @@
using namespace clang;
-// Magenta is taken for 'warning'. Red is already 'error' and 'cya'
+// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
// is already taken for 'note'. Green is already used to underline
// source ranges. White and black are bad because of the usual
// terminal backgrounds. Which leaves us only with TWO options.
>From d34822c663daee8031ea0c571a3ccf082c510851 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 8 Nov 2023 11:36:51 +0100
Subject: [PATCH 21/28] Address review comment
---
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 90ab5a4927efb39..73d3b9f195bdcea 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -45,7 +45,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
if (!Buff || Buff->getBufferSize() > MaxBufferSize)
return {};
- Lexer L = Lexer(FID, *Buff, SM, LangOpts);
+ Lexer L{FID, *Buff, SM, LangOpts};
L.SetKeepWhitespaceMode(true);
size_t NTokens = 0;
>From 47aa4a678203b95232709807bb7604f88e15e06e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 12 Nov 2023 08:01:25 +0100
Subject: [PATCH 22/28] Highlight all requested lines in one go.
---
.../clang/Frontend/CodeSnippetHighlighter.h | 9 +-
clang/include/clang/Frontend/TextDiagnostic.h | 2 +-
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 100 +++++++-----------
clang/lib/Frontend/TextDiagnostic.cpp | 12 ++-
4 files changed, 49 insertions(+), 74 deletions(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index cb3c96f69293795..89cdb27bccb5743 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -34,10 +34,11 @@ class CodeSnippetHighlighter final {
/// Produce StyleRanges for the given line.
/// The returned vector contains non-overlapping style ranges. They are sorted
/// from beginning of the line to the end.
- llvm::SmallVector<StyleRange>
- highlightLine(unsigned LineNumber, const Preprocessor *PP,
- const LangOptions &LangOpts, FileID FID,
- const SourceManager &SM, const char *LineStart);
+ // llvm::SmallVector<StyleRange>
+ std::unique_ptr<llvm::SmallVector<StyleRange>[]>
+ highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
+ const Preprocessor *PP, const LangOptions &LangOpts,
+ FileID FID, const SourceManager &SM);
};
} // namespace clang
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index ecd5bb4a4f568dc..7d1cebabf4c15e8 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -106,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer {
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
FileID FID, const SourceManager &SM, unsigned LineNo,
- unsigned DisplayLineNo, const char *LineStart);
+ unsigned DisplayLineNo, ArrayRef<StyleRange> Styles);
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 73d3b9f195bdcea..6d6958b10c05c46 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -28,27 +28,30 @@ static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
/// Maximum size of file we still highlight.
static constexpr size_t MaxBufferSize = 1024 * 1024; // 1MB.
-llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
- unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts,
- FileID FID, const SourceManager &SM, const char *LineStart) {
- std::chrono::steady_clock::time_point begin =
- std::chrono::steady_clock::now();
+std::unique_ptr<llvm::SmallVector<StyleRange>[]>
+CodeSnippetHighlighter::highlightLines(unsigned StartLineNumber,
+ unsigned EndLineNumber,
+ const Preprocessor *PP,
+ const LangOptions &LangOpts, FileID FID,
+ const SourceManager &SM) {
+ assert(StartLineNumber <= EndLineNumber);
+ auto SnippetRanges = std::make_unique<llvm::SmallVector<StyleRange>[]>(
+ EndLineNumber - StartLineNumber + 1);
if (!PP)
- return {};
+ return SnippetRanges;
// Might cause emission of another diagnostic.
if (PP->getIdentifierTable().getExternalIdentifierLookup())
- return {};
+ return SnippetRanges;
auto Buff = SM.getBufferOrNone(FID);
if (!Buff || Buff->getBufferSize() > MaxBufferSize)
- return {};
+ return SnippetRanges;
Lexer L{FID, *Buff, SM, LangOpts};
L.SetKeepWhitespaceMode(true);
- size_t NTokens = 0;
// Classify the given token and append it to the given vector.
auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec,
const Token &T, unsigned Start,
@@ -73,10 +76,9 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
}
};
- llvm::SmallVector<StyleRange> LineRanges;
+
bool Stop = false;
while (!Stop) {
- ++NTokens;
Token T;
Stop = L.LexFromRawLexer(T);
if (T.is(tok::unknown))
@@ -88,34 +90,33 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
continue;
bool Invalid = false;
- unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1;
- if (Invalid)
+ unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
+ if (Invalid || TokenEndLine < StartLineNumber)
continue;
- if (EndLine < LineNumber)
- continue;
- unsigned StartLine =
- SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1;
+ assert(TokenEndLine >= StartLineNumber);
+
+ unsigned TokenStartLine =
+ SM.getSpellingLineNumber(T.getLocation(), &Invalid);
if (Invalid)
continue;
- if (StartLine > LineNumber)
+ // If this happens, we're done.
+ if (TokenStartLine > EndLineNumber)
break;
- // Must have an intersection at this point
- assert(StartLine <= LineNumber && EndLine >= LineNumber);
-
unsigned StartCol =
SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;
// Simple tokens.
- if (StartLine == EndLine) {
+ if (TokenStartLine == TokenEndLine) {
+ llvm::SmallVector<StyleRange> &LineRanges =
+ SnippetRanges[TokenStartLine - StartLineNumber];
appendStyle(LineRanges, T, StartCol, T.getLength());
continue;
}
- unsigned NumLines = EndLine - StartLine;
- assert(NumLines >= 1);
+ assert((TokenEndLine - TokenStartLine) >= 1);
// For tokens that span multiple lines (think multiline comments), we
// divide them into multiple StyleRanges.
@@ -125,23 +126,26 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
- unsigned L = 0;
+ unsigned L = TokenStartLine;
unsigned LineLength = 0;
for (unsigned I = 0; I <= Spelling.size(); ++I) {
// This line is done.
if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
- if (StartLine + L == LineNumber) {
- if (L == 0) // First line
+ llvm::SmallVector<StyleRange> &LineRanges =
+ SnippetRanges[L - StartLineNumber];
+
+ if (L == StartLineNumber) {
+ if (L == TokenStartLine) // First line
appendStyle(LineRanges, T, StartCol, LineLength);
- else if (L == NumLines) // Last line
+ else if (L == TokenEndLine) // Last line
appendStyle(LineRanges, T, 0, EndCol);
else
appendStyle(LineRanges, T, 0, LineLength);
-
- // We only do one line, so we're done.
- break;
}
+
++L;
+ if (L > EndLineNumber)
+ break;
LineLength = 0;
continue;
}
@@ -149,37 +153,5 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine(
}
}
-#if 0
- llvm::errs() << "--\nLine Style info: \n";
- //int I = 0;
- //for (std::vector<StyleRange> &Line : Lines) {
- //llvm::errs() << I << ": ";
- for (const auto &R : LineRanges) {
- llvm::errs() << "{" << R.Start << ", " << R.End << "}, ";
- }
- llvm::errs() << "\n";
-
- //++I;
- //}
-#endif
-
-#if 0
- std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
- llvm::errs() << "Lexed " << NTokens << " Tokens\n";
- llvm::errs() << "That took "
- << std::chrono::duration_cast<std::chrono::microseconds>(end -
- begin)
- .count()
- << " microseconds\n";
- llvm::errs() << "That took "
- << std::chrono::duration_cast<std::chrono::milliseconds>(end -
- begin)
- .count()
- << " milliseconds\n";
- llvm::errs()
- << "That took "
- << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count()
- << " seconds\n";
-#endif
- return LineRanges;
+ return SnippetRanges;
}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 1378b3a0812e598..8230979c62161aa 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1186,6 +1186,11 @@ void TextDiagnostic::emitSnippetAndCaret(
SmallVector<LineRange> LineRanges =
prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts);
+ // Prepare source highlighting information for the lines we're about to emit.
+ std::unique_ptr<llvm::SmallVector<StyleRange>[]> SourceStyles =
+ SnippetHighlighter.highlightLines(Lines.first, Lines.second, PP, LangOpts,
+ FID, SM);
+
for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1;
++LineNo, ++DisplayLineNo) {
// Rewind from the current position to the start of the line.
@@ -1250,7 +1255,7 @@ void TextDiagnostic::emitSnippetAndCaret(
// Emit what we have computed.
emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo,
- DisplayLineNo, LineStart);
+ DisplayLineNo, SourceStyles[LineNo - Lines.first]);
if (!CaretLine.empty()) {
indentForLineNumbers();
@@ -1282,10 +1287,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth, FileID FID,
const SourceManager &SM, unsigned LineNo,
unsigned DisplayLineNo,
- const char *LineStart) {
- llvm::SmallVector<StyleRange> Styles = SnippetHighlighter.highlightLine(
- LineNo - 1, PP, LangOpts, FID, SM, LineStart);
-
+ ArrayRef<StyleRange> Styles) {
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo);
>From d1de6c9620c672d955a1be6e2884d87746a259b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 12 Nov 2023 08:19:30 +0100
Subject: [PATCH 23/28] Remove a leftover comment
---
clang/include/clang/Frontend/CodeSnippetHighlighter.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index 89cdb27bccb5743..6aa4497182c84af 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -34,7 +34,6 @@ class CodeSnippetHighlighter final {
/// Produce StyleRanges for the given line.
/// The returned vector contains non-overlapping style ranges. They are sorted
/// from beginning of the line to the end.
- // llvm::SmallVector<StyleRange>
std::unique_ptr<llvm::SmallVector<StyleRange>[]>
highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
const Preprocessor *PP, const LangOptions &LangOpts,
>From f5208006229149a2b51cdf77f0b5e4313e4fd134 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 12 Nov 2023 09:28:23 +0100
Subject: [PATCH 24/28] Remove CodeSnippetHighlighter again
It was a class with only one function, used in only one place. Just
merge it into TextDiagnostic.
---
.../include/clang/Basic/DiagnosticOptions.def | 3 +
clang/include/clang/Basic/DiagnosticOptions.h | 1 +
clang/include/clang/Driver/Options.td | 4 +
.../clang/Frontend/CodeSnippetHighlighter.h | 45 -----
clang/include/clang/Frontend/TextDiagnostic.h | 15 +-
clang/lib/Frontend/CMakeLists.txt | 1 -
clang/lib/Frontend/CodeSnippetHighlighter.cpp | 157 ------------------
clang/lib/Frontend/TextDiagnostic.cpp | 149 ++++++++++++++++-
8 files changed, 160 insertions(+), 215 deletions(-)
delete mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h
delete mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp
diff --git a/clang/include/clang/Basic/DiagnosticOptions.def b/clang/include/clang/Basic/DiagnosticOptions.def
index 6d0c1b14acc1207..553f6476f07b333 100644
--- a/clang/include/clang/Basic/DiagnosticOptions.def
+++ b/clang/include/clang/Basic/DiagnosticOptions.def
@@ -96,6 +96,9 @@ VALUE_DIAGOPT(ShowLineNumbers, 1, DefaultShowLineNumbers)
VALUE_DIAGOPT(TabStop, 32, DefaultTabStop) /// The distance between tab stops.
/// Column limit for formatting message diagnostics, or 0 if unused.
VALUE_DIAGOPT(MessageLength, 32, 0)
+/// Default maximum file size to highlight code snippets for, in bytes.
+VALUE_DIAGOPT(MaxHighlightFileSize, 32, DefaultMaxHighlightFileSize)
+
DIAGOPT(ShowSafeBufferUsageSuggestions, 1, 0)
diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h
index 099982c3bdd5a00..4be540b559a3ae4 100644
--- a/clang/include/clang/Basic/DiagnosticOptions.h
+++ b/clang/include/clang/Basic/DiagnosticOptions.h
@@ -87,6 +87,7 @@ class DiagnosticOptions : public RefCountedBase<DiagnosticOptions>{
DefaultSpellCheckingLimit = 50,
DefaultSnippetLineLimit = 16,
DefaultShowLineNumbers = 1,
+ DefaultMaxHighlightFileSize = 1024 * 1024
};
// Define simple diagnostic options (with no accessors).
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index dacc4442b338a29..534891a66d1904c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7014,6 +7014,10 @@ def fno_diagnostics_use_presumed_location : Flag<["-"], "fno-diagnostics-use-pre
def ftabstop : Separate<["-"], "ftabstop">, MetaVarName<"<N>">,
HelpText<"Set the tab stop distance.">,
MarshallingInfoInt<DiagnosticOpts<"TabStop">, "DiagnosticOptions::DefaultTabStop">;
+def fmax_highlight_file_size : Separate<["-"], "fmax-highlight-file-size">, MetaVarName<"<N>">,
+ HelpText<"Set the tab stop distance.">,
+ MarshallingInfoInt<DiagnosticOpts<"MaxHighlightFileSize">, "DiagnosticOptions::DefaultMaxHighlightFileSize">;
+
def ferror_limit : Separate<["-"], "ferror-limit">, MetaVarName<"<N>">,
HelpText<"Set the maximum number of errors to emit before stopping (0 = no limit).">,
MarshallingInfoInt<DiagnosticOpts<"ErrorLimit">>;
diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
deleted file mode 100644
index 6aa4497182c84af..000000000000000
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
-#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
-
-#include "clang/Basic/LangOptions.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace clang {
-
-struct StyleRange {
- unsigned Start;
- unsigned End;
- enum llvm::raw_ostream::Colors Color;
- StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C)
- : Start(S), End(E), Color(C){};
-};
-
-class Preprocessor;
-class FileID;
-class SourceManager;
-
-class CodeSnippetHighlighter final {
-public:
- CodeSnippetHighlighter() = default;
-
- /// Produce StyleRanges for the given line.
- /// The returned vector contains non-overlapping style ranges. They are sorted
- /// from beginning of the line to the end.
- std::unique_ptr<llvm::SmallVector<StyleRange>[]>
- highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
- const Preprocessor *PP, const LangOptions &LangOpts,
- FileID FID, const SourceManager &SM);
-};
-
-} // namespace clang
-
-#endif
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 7d1cebabf4c15e8..05ec753289d14fd 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -15,8 +15,8 @@
#ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
#define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
-#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Frontend/DiagnosticRenderer.h"
+#include "llvm/Support/raw_ostream.h"
namespace clang {
/// Class to encapsulate the logic for formatting and printing a textual
@@ -34,7 +34,6 @@ namespace clang {
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
const Preprocessor *PP;
- CodeSnippetHighlighter SnippetHighlighter;
public:
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
@@ -42,6 +41,14 @@ class TextDiagnostic : public DiagnosticRenderer {
~TextDiagnostic() override;
+ struct StyleRange {
+ unsigned Start;
+ unsigned End;
+ enum llvm::raw_ostream::Colors Color;
+ StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C)
+ : Start(S), End(E), Color(C){};
+ };
+
/// Print the diagonstic level to a raw_ostream.
///
/// This is a static helper that handles colorizing the level and formatting
@@ -105,8 +112,8 @@ class TextDiagnostic : public DiagnosticRenderer {
ArrayRef<FixItHint> Hints);
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
- FileID FID, const SourceManager &SM, unsigned LineNo,
- unsigned DisplayLineNo, ArrayRef<StyleRange> Styles);
+ unsigned LineNo, unsigned DisplayLineNo,
+ ArrayRef<StyleRange> Styles);
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index f3547f771593093..1e5f0a859dfd568 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -42,7 +42,6 @@ add_clang_library(clangFrontend
TextDiagnosticPrinter.cpp
VerifyDiagnosticConsumer.cpp
InterfaceStubFunctionsConsumer.cpp
- CodeSnippetHighlighter.cpp
DEPENDS
ClangDriverOptions
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
deleted file mode 100644
index 6d6958b10c05c46..000000000000000
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-//===-- CodeSnippetHighlighter.cpp - Code snippet highlighting --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/Frontend/CodeSnippetHighlighter.h"
-#include "clang/Basic/CharInfo.h"
-#include "clang/Basic/DiagnosticOptions.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Lex/Lexer.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/PreprocessorOptions.h"
-#include "llvm/Support/raw_ostream.h"
-#include <chrono>
-
-using namespace clang;
-
-// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
-// is already taken for 'note'. Green is already used to underline
-// source ranges. White and black are bad because of the usual
-// terminal backgrounds. Which leaves us only with TWO options.
-static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
-static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
-static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
-/// Maximum size of file we still highlight.
-static constexpr size_t MaxBufferSize = 1024 * 1024; // 1MB.
-
-std::unique_ptr<llvm::SmallVector<StyleRange>[]>
-CodeSnippetHighlighter::highlightLines(unsigned StartLineNumber,
- unsigned EndLineNumber,
- const Preprocessor *PP,
- const LangOptions &LangOpts, FileID FID,
- const SourceManager &SM) {
- assert(StartLineNumber <= EndLineNumber);
- auto SnippetRanges = std::make_unique<llvm::SmallVector<StyleRange>[]>(
- EndLineNumber - StartLineNumber + 1);
-
- if (!PP)
- return SnippetRanges;
-
- // Might cause emission of another diagnostic.
- if (PP->getIdentifierTable().getExternalIdentifierLookup())
- return SnippetRanges;
-
- auto Buff = SM.getBufferOrNone(FID);
- if (!Buff || Buff->getBufferSize() > MaxBufferSize)
- return SnippetRanges;
-
- Lexer L{FID, *Buff, SM, LangOpts};
- L.SetKeepWhitespaceMode(true);
-
- // Classify the given token and append it to the given vector.
- auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec,
- const Token &T, unsigned Start,
- unsigned Length) -> void {
- if (T.is(tok::raw_identifier)) {
- StringRef RawIdent = T.getRawIdentifier();
- // Special case true/false/nullptr literals, since they will otherwise be
- // treated as keywords.
- if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
- Vec.emplace_back(Start, Start + Length, LiteralColor);
- } else {
- const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
- assert(II);
- if (II->isKeyword(LangOpts))
- Vec.emplace_back(Start, Start + Length, KeywordColor);
- }
- } else if (tok::isLiteral(T.getKind())) {
- Vec.emplace_back(Start, Start + Length, LiteralColor);
- } else {
- assert(T.is(tok::comment));
- Vec.emplace_back(Start, Start + Length, CommentColor);
- }
- };
-
-
- bool Stop = false;
- while (!Stop) {
- Token T;
- Stop = L.LexFromRawLexer(T);
- if (T.is(tok::unknown))
- continue;
-
- // We are only interested in identifiers, literals and comments.
- if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
- !tok::isLiteral(T.getKind()))
- continue;
-
- bool Invalid = false;
- unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
- if (Invalid || TokenEndLine < StartLineNumber)
- continue;
-
- assert(TokenEndLine >= StartLineNumber);
-
- unsigned TokenStartLine =
- SM.getSpellingLineNumber(T.getLocation(), &Invalid);
- if (Invalid)
- continue;
- // If this happens, we're done.
- if (TokenStartLine > EndLineNumber)
- break;
-
- unsigned StartCol =
- SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
- if (Invalid)
- continue;
-
- // Simple tokens.
- if (TokenStartLine == TokenEndLine) {
- llvm::SmallVector<StyleRange> &LineRanges =
- SnippetRanges[TokenStartLine - StartLineNumber];
- appendStyle(LineRanges, T, StartCol, T.getLength());
- continue;
- }
- assert((TokenEndLine - TokenStartLine) >= 1);
-
- // For tokens that span multiple lines (think multiline comments), we
- // divide them into multiple StyleRanges.
- unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
- if (Invalid)
- continue;
-
- std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
-
- unsigned L = TokenStartLine;
- unsigned LineLength = 0;
- for (unsigned I = 0; I <= Spelling.size(); ++I) {
- // This line is done.
- if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
- llvm::SmallVector<StyleRange> &LineRanges =
- SnippetRanges[L - StartLineNumber];
-
- if (L == StartLineNumber) {
- if (L == TokenStartLine) // First line
- appendStyle(LineRanges, T, StartCol, LineLength);
- else if (L == TokenEndLine) // Last line
- appendStyle(LineRanges, T, 0, EndCol);
- else
- appendStyle(LineRanges, T, 0, LineLength);
- }
-
- ++L;
- if (L > EndLineNumber)
- break;
- LineLength = 0;
- continue;
- }
- ++LineLength;
- }
- }
-
- return SnippetRanges;
-}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 8230979c62161aa..3b012c90ec89665 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -11,7 +11,6 @@
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
-#include "clang/Frontend/CodeSnippetHighlighter.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
@@ -43,6 +42,16 @@ static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
static const enum raw_ostream::Colors savedColor =
raw_ostream::SAVEDCOLOR;
+// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
+// is already taken for 'note'. Green is already used to underline
+// source ranges. White and black are bad because of the usual
+// terminal backgrounds. Which leaves us only with TWO options.
+static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
+static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
+/// Maximum size of file we still highlight.
+static constexpr size_t MaxBufferSize = 1024 * 1024; // 1MB.
+
/// Add highlights to differences in template strings.
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
bool &Normal, bool Bold) {
@@ -1114,6 +1123,132 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
return LineRanges;
}
+std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
+highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
+ const Preprocessor *PP, const LangOptions &LangOpts, FileID FID,
+ const SourceManager &SM) {
+ assert(StartLineNumber <= EndLineNumber);
+ auto SnippetRanges =
+ std::make_unique<llvm::SmallVector<TextDiagnostic::StyleRange>[]>(
+ EndLineNumber - StartLineNumber + 1);
+
+ if (!PP)
+ return SnippetRanges;
+
+ // Might cause emission of another diagnostic.
+ if (PP->getIdentifierTable().getExternalIdentifierLookup())
+ return SnippetRanges;
+
+ auto Buff = SM.getBufferOrNone(FID);
+ if (!Buff || Buff->getBufferSize() > MaxBufferSize)
+ return SnippetRanges;
+
+ Lexer L{FID, *Buff, SM, LangOpts};
+ L.SetKeepWhitespaceMode(true);
+
+ // Classify the given token and append it to the given vector.
+ auto appendStyle =
+ [PP, &LangOpts](llvm::SmallVector<TextDiagnostic::StyleRange> &Vec,
+ const Token &T, unsigned Start, unsigned Length) -> void {
+ if (T.is(tok::raw_identifier)) {
+ StringRef RawIdent = T.getRawIdentifier();
+ // Special case true/false/nullptr literals, since they will otherwise be
+ // treated as keywords.
+ if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
+ Vec.emplace_back(Start, Start + Length, LiteralColor);
+ } else {
+ const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
+ assert(II);
+ if (II->isKeyword(LangOpts))
+ Vec.emplace_back(Start, Start + Length, KeywordColor);
+ }
+ } else if (tok::isLiteral(T.getKind())) {
+ Vec.emplace_back(Start, Start + Length, LiteralColor);
+ } else {
+ assert(T.is(tok::comment));
+ Vec.emplace_back(Start, Start + Length, CommentColor);
+ }
+ };
+
+ bool Stop = false;
+ while (!Stop) {
+ Token T;
+ Stop = L.LexFromRawLexer(T);
+ if (T.is(tok::unknown))
+ continue;
+
+ // We are only interested in identifiers, literals and comments.
+ if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
+ !tok::isLiteral(T.getKind()))
+ continue;
+
+ bool Invalid = false;
+ unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
+ if (Invalid || TokenEndLine < StartLineNumber)
+ continue;
+
+ assert(TokenEndLine >= StartLineNumber);
+
+ unsigned TokenStartLine =
+ SM.getSpellingLineNumber(T.getLocation(), &Invalid);
+ if (Invalid)
+ continue;
+ // If this happens, we're done.
+ if (TokenStartLine > EndLineNumber)
+ break;
+
+ unsigned StartCol =
+ SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ // Simple tokens.
+ if (TokenStartLine == TokenEndLine) {
+ llvm::SmallVector<TextDiagnostic::StyleRange> &LineRanges =
+ SnippetRanges[TokenStartLine - StartLineNumber];
+ appendStyle(LineRanges, T, StartCol, T.getLength());
+ continue;
+ }
+ assert((TokenEndLine - TokenStartLine) >= 1);
+
+ // For tokens that span multiple lines (think multiline comments), we
+ // divide them into multiple StyleRanges.
+ unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
+ if (Invalid)
+ continue;
+
+ std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
+
+ unsigned L = TokenStartLine;
+ unsigned LineLength = 0;
+ for (unsigned I = 0; I <= Spelling.size(); ++I) {
+ // This line is done.
+ if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
+ llvm::SmallVector<TextDiagnostic::StyleRange> &LineRanges =
+ SnippetRanges[L - StartLineNumber];
+
+ if (L == StartLineNumber) {
+ if (L == TokenStartLine) // First line
+ appendStyle(LineRanges, T, StartCol, LineLength);
+ else if (L == TokenEndLine) // Last line
+ appendStyle(LineRanges, T, 0, EndCol);
+ else
+ appendStyle(LineRanges, T, 0, LineLength);
+ }
+
+ ++L;
+ if (L > EndLineNumber)
+ break;
+ LineLength = 0;
+ continue;
+ }
+ ++LineLength;
+ }
+ }
+
+ return SnippetRanges;
+}
+
/// Emit a code snippet and caret line.
///
/// This routine emits a single line's code snippet and caret line..
@@ -1188,8 +1323,7 @@ void TextDiagnostic::emitSnippetAndCaret(
// Prepare source highlighting information for the lines we're about to emit.
std::unique_ptr<llvm::SmallVector<StyleRange>[]> SourceStyles =
- SnippetHighlighter.highlightLines(Lines.first, Lines.second, PP, LangOpts,
- FID, SM);
+ highlightLines(Lines.first, Lines.second, PP, LangOpts, FID, SM);
for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1;
++LineNo, ++DisplayLineNo) {
@@ -1254,8 +1388,8 @@ void TextDiagnostic::emitSnippetAndCaret(
}
// Emit what we have computed.
- emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo,
- DisplayLineNo, SourceStyles[LineNo - Lines.first]);
+ emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo,
+ SourceStyles[LineNo - Lines.first]);
if (!CaretLine.empty()) {
indentForLineNumbers();
@@ -1284,9 +1418,8 @@ void TextDiagnostic::emitSnippetAndCaret(
}
void TextDiagnostic::emitSnippet(StringRef SourceLine,
- unsigned MaxLineNoDisplayWidth, FileID FID,
- const SourceManager &SM, unsigned LineNo,
- unsigned DisplayLineNo,
+ unsigned MaxLineNoDisplayWidth,
+ unsigned LineNo, unsigned DisplayLineNo,
ArrayRef<StyleRange> Styles) {
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
>From f4ca6429917e2205e4ae98f88bed72f9c62c55fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 12 Nov 2023 19:07:13 +0100
Subject: [PATCH 25/28] Whitespace cleanup
---
clang/include/clang/Frontend/TextDiagnostic.h | 1 +
clang/include/clang/Lex/Preprocessor.h | 1 -
2 files changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 05ec753289d14fd..a2fe8ae995423b9 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -19,6 +19,7 @@
#include "llvm/Support/raw_ostream.h"
namespace clang {
+
/// Class to encapsulate the logic for formatting and printing a textual
/// diagnostic message.
///
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index b1c2807e35a3149..4ec21a8b6be2c85 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -128,7 +128,6 @@ enum MacroUse {
class Preprocessor {
friend class VAOptDefinitionContext;
friend class VariadicMacroScopeGuard;
- friend class CodeSnippetHighlighter;
llvm::unique_function<void(const clang::Token &)> OnToken;
std::shared_ptr<PreprocessorOptions> PPOpts;
>From eb60d58060a86fdf5b5bbb0581ad203937e94c4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 12 Nov 2023 19:14:09 +0100
Subject: [PATCH 26/28] Respect max-highlight-file-size option
---
clang/include/clang/Driver/Options.td | 8 ++++----
clang/lib/Driver/ToolChains/Clang.cpp | 1 +
clang/lib/Frontend/TextDiagnostic.cpp | 10 +++++-----
3 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 534891a66d1904c..b9e8dfef2a98be6 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1785,6 +1785,10 @@ def : Flag<["-"], "fdiagnostics-color">, Group<f_Group>,
def : Flag<["-"], "fno-diagnostics-color">, Group<f_Group>,
Visibility<[ClangOption, CLOption, DXCOption]>, Alias<fno_color_diagnostics>;
def fdiagnostics_color_EQ : Joined<["-"], "fdiagnostics-color=">, Group<f_Group>;
+def fmax_highlight_file_size_EQ : Joined<["-"], "fmax-highlight-file-size=">,
+ Visibility<[ClangOption, CC1Option]>,
+ HelpText<"Maximum file size (in bytes) to still highlight code snippets from.">,
+ MarshallingInfoInt<DiagnosticOpts<"MaxHighlightFileSize">, "DiagnosticOptions::DefaultMaxHighlightFileSize">;
def fansi_escape_codes : Flag<["-"], "fansi-escape-codes">, Group<f_Group>,
Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>,
HelpText<"Use ANSI escape codes for diagnostics">,
@@ -7014,10 +7018,6 @@ def fno_diagnostics_use_presumed_location : Flag<["-"], "fno-diagnostics-use-pre
def ftabstop : Separate<["-"], "ftabstop">, MetaVarName<"<N>">,
HelpText<"Set the tab stop distance.">,
MarshallingInfoInt<DiagnosticOpts<"TabStop">, "DiagnosticOptions::DefaultTabStop">;
-def fmax_highlight_file_size : Separate<["-"], "fmax-highlight-file-size">, MetaVarName<"<N>">,
- HelpText<"Set the tab stop distance.">,
- MarshallingInfoInt<DiagnosticOpts<"MaxHighlightFileSize">, "DiagnosticOptions::DefaultMaxHighlightFileSize">;
-
def ferror_limit : Separate<["-"], "ferror-limit">, MetaVarName<"<N>">,
HelpText<"Set the maximum number of errors to emit before stopping (0 = no limit).">,
MarshallingInfoInt<DiagnosticOpts<"ErrorLimit">>;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3b98c7ae6e6ec66..f2dd8b9dea08be6 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7181,6 +7181,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_dI);
Args.AddLastArg(CmdArgs, options::OPT_fmax_tokens_EQ);
+ Args.AddLastArg(CmdArgs, options::OPT_fmax_highlight_file_size_EQ);
// Handle serialized diagnostics.
if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 3b012c90ec89665..82a9bdf1511f643 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -49,8 +49,6 @@ static const enum raw_ostream::Colors savedColor =
static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
-/// Maximum size of file we still highlight.
-static constexpr size_t MaxBufferSize = 1024 * 1024; // 1MB.
/// Add highlights to differences in template strings.
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
@@ -1125,7 +1123,8 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
- const Preprocessor *PP, const LangOptions &LangOpts, FileID FID,
+ const Preprocessor *PP, const LangOptions &LangOpts,
+ uint32_t MaxHighlightFileSize, FileID FID,
const SourceManager &SM) {
assert(StartLineNumber <= EndLineNumber);
auto SnippetRanges =
@@ -1140,7 +1139,7 @@ highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
return SnippetRanges;
auto Buff = SM.getBufferOrNone(FID);
- if (!Buff || Buff->getBufferSize() > MaxBufferSize)
+ if (!Buff || Buff->getBufferSize() > MaxHighlightFileSize)
return SnippetRanges;
Lexer L{FID, *Buff, SM, LangOpts};
@@ -1323,7 +1322,8 @@ void TextDiagnostic::emitSnippetAndCaret(
// Prepare source highlighting information for the lines we're about to emit.
std::unique_ptr<llvm::SmallVector<StyleRange>[]> SourceStyles =
- highlightLines(Lines.first, Lines.second, PP, LangOpts, FID, SM);
+ highlightLines(Lines.first, Lines.second, PP, LangOpts,
+ DiagOpts->MaxHighlightFileSize, FID, SM);
for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1;
++LineNo, ++DisplayLineNo) {
>From fbc9d939764df67a5faeeb615628dc67f2b65667 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 12 Nov 2023 19:39:05 +0100
Subject: [PATCH 27/28] Fix multiline token in the middle of the line range
---
clang/lib/Frontend/TextDiagnostic.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 82a9bdf1511f643..8a9e860a4b1ae04 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1226,7 +1226,7 @@ highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
llvm::SmallVector<TextDiagnostic::StyleRange> &LineRanges =
SnippetRanges[L - StartLineNumber];
- if (L == StartLineNumber) {
+ if (L >= StartLineNumber) {
if (L == TokenStartLine) // First line
appendStyle(LineRanges, T, StartCol, LineLength);
else if (L == TokenEndLine) // Last line
>From 39495eda812ad997ea0a481eac0157a086eee84a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Mon, 13 Nov 2023 09:26:11 +0100
Subject: [PATCH 28/28] Add a doc comment to highlightLines()
---
clang/lib/Frontend/TextDiagnostic.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 8a9e860a4b1ae04..bd515215e717614 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1121,6 +1121,13 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
return LineRanges;
}
+/// Creates syntax highlighting information in form of StyleRanges.
+///
+/// The returned unique ptr has always exactly size
+/// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there
+/// corresponds to syntax highlighting information in one line. In each line,
+/// the StyleRanges are non-overlapping and sorted from start to end of the
+/// line.
std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
highlightLines(unsigned StartLineNumber, unsigned EndLineNumber,
const Preprocessor *PP, const LangOptions &LangOpts,
More information about the cfe-commits
mailing list