[clang] [clang][Diagnostics] Highlight code snippets (PR #66514)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 21 02:02:12 PDT 2023


Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,
Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>
Message-ID:
In-Reply-To: <llvm/llvm-project/pull/66514/clang at github.com>


https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/66514

>From 4323e6952a577a81a5fdc51ce9571f28d7ccb6f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 15 Sep 2023 15:51:39 +0200
Subject: [PATCH 1/5] [clang][Diagnostics] Highlight code snippets

Add some primitive syntax highlighting to our code snippet output.
---
 .../clang/Frontend/CodeSnippetHighlighter.h   |  46 +++++++
 clang/include/clang/Frontend/TextDiagnostic.h |   2 +
 clang/lib/Frontend/CMakeLists.txt             |   1 +
 clang/lib/Frontend/CodeSnippetHighlighter.cpp | 120 ++++++++++++++++++
 clang/lib/Frontend/TextDiagnostic.cpp         |  26 ++++
 5 files changed, 195 insertions(+)
 create mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h
 create mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp

diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
new file mode 100644
index 000000000000000..776954b59e2e1a8
--- /dev/null
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -0,0 +1,46 @@
+//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
+#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H
+
+#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+namespace clang {
+
+struct StyleRange {
+  unsigned Start;
+  unsigned End;
+  const enum llvm::raw_ostream::Colors c;
+};
+
+class CodeSnippetHighlighter final {
+public:
+  CodeSnippetHighlighter() = default;
+
+  /// Produce StyleRanges for the given line.
+  /// The returned vector contains non-overlapping style ranges. They are sorted
+  /// from beginning of the line to the end.
+  std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+                                        const LangOptions &LangOpts);
+
+private:
+  bool Initialized = false;
+  /// Fills Keywords and Literals.
+  void ensureTokenData();
+
+  llvm::SmallSet<StringRef, 12> Keywords;
+  llvm::SmallSet<StringRef, 12> Literals;
+};
+
+} // namespace clang
+
+#endif
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 7eb0ab0cdc9bca8..59fd4d4f9408d48 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
 #define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
 
+#include "clang/Frontend/CodeSnippetHighlighter.h"
 #include "clang/Frontend/DiagnosticRenderer.h"
 
 namespace clang {
@@ -33,6 +34,7 @@ namespace clang {
 /// printing coming out of libclang.
 class TextDiagnostic : public DiagnosticRenderer {
   raw_ostream &OS;
+  CodeSnippetHighlighter SnippetHighlighter;
 
 public:
   TextDiagnostic(raw_ostream &OS,
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index 1e5f0a859dfd568..f3547f771593093 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -42,6 +42,7 @@ add_clang_library(clangFrontend
   TextDiagnosticPrinter.cpp
   VerifyDiagnosticConsumer.cpp
   InterfaceStubFunctionsConsumer.cpp
+  CodeSnippetHighlighter.cpp
 
   DEPENDS
   ClangDriverOptions
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
new file mode 100644
index 000000000000000..829a533ad2692e5
--- /dev/null
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -0,0 +1,120 @@
+
+#include "clang/Frontend/CodeSnippetHighlighter.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+
+void CodeSnippetHighlighter::ensureTokenData() {
+  if (Initialized)
+    return;
+
+  // List of keywords, literals and types we want to highlight.
+  // These are best-effort, as is everything we do wrt. highlighting.
+  Keywords.insert("_Static_assert");
+  Keywords.insert("auto");
+  Keywords.insert("concept");
+  Keywords.insert("const");
+  Keywords.insert("consteval");
+  Keywords.insert("constexpr");
+  Keywords.insert("delete");
+  Keywords.insert("do");
+  Keywords.insert("else");
+  Keywords.insert("final");
+  Keywords.insert("for");
+  Keywords.insert("if");
+  Keywords.insert("mutable");
+  Keywords.insert("namespace");
+  Keywords.insert("new");
+  Keywords.insert("private");
+  Keywords.insert("public");
+  Keywords.insert("requires");
+  Keywords.insert("return");
+  Keywords.insert("static");
+  Keywords.insert("static_assert");
+  Keywords.insert("using");
+  Keywords.insert("void");
+  Keywords.insert("volatile");
+  Keywords.insert("while");
+
+  // Builtin types we highlight
+  Keywords.insert("void");
+  Keywords.insert("char");
+  Keywords.insert("short");
+  Keywords.insert("int");
+  Keywords.insert("unsigned");
+  Keywords.insert("long");
+  Keywords.insert("float");
+  Keywords.insert("double");
+
+  Literals.insert("true");
+  Literals.insert("false");
+  Literals.insert("nullptr");
+
+  Initialized = true;
+}
+
+static SourceManager createTempSourceManager() {
+  FileSystemOptions FileOpts;
+  FileManager FileMgr(FileOpts);
+  llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs());
+  llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
+  DiagnosticsEngine diags(DiagIDs, DiagOpts);
+  return SourceManager(diags, FileMgr);
+}
+
+static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
+                             const LangOptions &LangOpts) {
+  return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
+}
+
+std::vector<StyleRange>
+CodeSnippetHighlighter::highlightLine(StringRef SourceLine,
+                                      const LangOptions &LangOpts) {
+  ensureTokenData();
+
+  constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
+  constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
+  constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
+
+  const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
+  SourceManager FakeSM = createTempSourceManager();
+  Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
+  L.SetKeepWhitespaceMode(true);
+
+  std::vector<StyleRange> Styles;
+  bool Stop = false;
+  while (!Stop) {
+    Token tok;
+    Stop = L.LexFromRawLexer(tok);
+    if (tok.is(tok::unknown))
+      continue;
+
+    bool Invalid;
+    unsigned Start =
+        FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1;
+    if (Invalid)
+      continue;
+
+    if (tok.is(tok::raw_identifier)) {
+      // Almost everything we lex is an identifier, since we use a raw lexer.
+      // Some should be highlightes as literals, others as keywords.
+      if (Keywords.contains(tok.getRawIdentifier()))
+        Styles.push_back(
+            StyleRange{Start, Start + tok.getLength(), KeywordColor});
+      else if (Literals.contains(tok.getRawIdentifier()))
+        Styles.push_back(
+            StyleRange{Start, Start + tok.getLength(), LiteralColor});
+    } else if (tok::isLiteral(tok.getKind())) {
+      Styles.push_back(
+          StyleRange{Start, Start + tok.getLength(), LiteralColor});
+    } else if (tok.is(tok::comment)) {
+      Styles.push_back(
+          StyleRange{Start, Start + tok.getLength(), CommentColor});
+    }
+  }
+
+  return Styles;
+}
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index eaa6e8d29a1dece..8bd13aa72b13235 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -11,6 +11,7 @@
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/CodeSnippetHighlighter.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1279,6 +1280,9 @@ void TextDiagnostic::emitSnippetAndCaret(
 void TextDiagnostic::emitSnippet(StringRef SourceLine,
                                  unsigned MaxLineNoDisplayWidth,
                                  unsigned LineNo) {
+  std::vector<StyleRange> Styles =
+      SnippetHighlighter.highlightLine(SourceLine, LangOpts);
+
   // Emit line number.
   if (MaxLineNoDisplayWidth > 0) {
     unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
@@ -1288,11 +1292,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
 
   // Print the source line one character at a time.
   bool PrintReversed = false;
+  bool HighlightingEnabled = DiagOpts->ShowColors;
   size_t I = 0;
   while (I < SourceLine.size()) {
     auto [Str, WasPrintable] =
         printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop);
 
+    // Just stop highlighting anything for this line if we found a non-printable
+    // character.
+    if (!WasPrintable)
+      HighlightingEnabled = false;
+
+    // FIXME: I hope we can do this in some nicer way.
+    if (HighlightingEnabled) {
+      std::optional<enum raw_ostream::Colors> H;
+      for (auto &P : Styles) {
+        if (P.Start < I && P.End >= I) {
+          H = P.c;
+          break;
+        }
+      }
+
+      if (H) {
+        OS.changeColor(*H, false);
+      } else
+        OS.resetColor();
+    }
+
     // Toggle inverted colors on or off for this character.
     if (DiagOpts->ShowColors) {
       if (WasPrintable == PrintReversed) {

>From d473e6c037a09199df3a12bae56209c8e4835ec7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 20 Sep 2023 15:28:10 +0200
Subject: [PATCH 2/5] Get identifier table from Preprocessor

---
 .../clang/Frontend/CodeSnippetHighlighter.h   |  11 +-
 clang/include/clang/Frontend/TextDiagnostic.h |   7 +-
 clang/lib/Frontend/CodeSnippetHighlighter.cpp | 101 +++++-------------
 clang/lib/Frontend/TextDiagnostic.cpp         |   8 +-
 clang/lib/Frontend/TextDiagnosticPrinter.cpp  |   2 +-
 5 files changed, 39 insertions(+), 90 deletions(-)

diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index 776954b59e2e1a8..ec03375221f9ffc 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -22,6 +22,8 @@ struct StyleRange {
   const enum llvm::raw_ostream::Colors c;
 };
 
+class Preprocessor;
+
 class CodeSnippetHighlighter final {
 public:
   CodeSnippetHighlighter() = default;
@@ -30,15 +32,8 @@ class CodeSnippetHighlighter final {
   /// The returned vector contains non-overlapping style ranges. They are sorted
   /// from beginning of the line to the end.
   std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+                                        const Preprocessor *PP,
                                         const LangOptions &LangOpts);
-
-private:
-  bool Initialized = false;
-  /// Fills Keywords and Literals.
-  void ensureTokenData();
-
-  llvm::SmallSet<StringRef, 12> Keywords;
-  llvm::SmallSet<StringRef, 12> Literals;
 };
 
 } // namespace clang
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 59fd4d4f9408d48..8cdb9b141a8a4af 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -19,7 +19,6 @@
 #include "clang/Frontend/DiagnosticRenderer.h"
 
 namespace clang {
-
 /// Class to encapsulate the logic for formatting and printing a textual
 /// diagnostic message.
 ///
@@ -34,12 +33,12 @@ namespace clang {
 /// printing coming out of libclang.
 class TextDiagnostic : public DiagnosticRenderer {
   raw_ostream &OS;
+  const Preprocessor *PP;
   CodeSnippetHighlighter SnippetHighlighter;
 
 public:
-  TextDiagnostic(raw_ostream &OS,
-                 const LangOptions &LangOpts,
-                 DiagnosticOptions *DiagOpts);
+  TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+                 const Preprocessor *PP, DiagnosticOptions *DiagOpts);
 
   ~TextDiagnostic() override;
 
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 829a533ad2692e5..63b3707fbb7ef83 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -3,59 +3,12 @@
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace clang;
 
-void CodeSnippetHighlighter::ensureTokenData() {
-  if (Initialized)
-    return;
-
-  // List of keywords, literals and types we want to highlight.
-  // These are best-effort, as is everything we do wrt. highlighting.
-  Keywords.insert("_Static_assert");
-  Keywords.insert("auto");
-  Keywords.insert("concept");
-  Keywords.insert("const");
-  Keywords.insert("consteval");
-  Keywords.insert("constexpr");
-  Keywords.insert("delete");
-  Keywords.insert("do");
-  Keywords.insert("else");
-  Keywords.insert("final");
-  Keywords.insert("for");
-  Keywords.insert("if");
-  Keywords.insert("mutable");
-  Keywords.insert("namespace");
-  Keywords.insert("new");
-  Keywords.insert("private");
-  Keywords.insert("public");
-  Keywords.insert("requires");
-  Keywords.insert("return");
-  Keywords.insert("static");
-  Keywords.insert("static_assert");
-  Keywords.insert("using");
-  Keywords.insert("void");
-  Keywords.insert("volatile");
-  Keywords.insert("while");
-
-  // Builtin types we highlight
-  Keywords.insert("void");
-  Keywords.insert("char");
-  Keywords.insert("short");
-  Keywords.insert("int");
-  Keywords.insert("unsigned");
-  Keywords.insert("long");
-  Keywords.insert("float");
-  Keywords.insert("double");
-
-  Literals.insert("true");
-  Literals.insert("false");
-  Literals.insert("nullptr");
-
-  Initialized = true;
-}
-
 static SourceManager createTempSourceManager() {
   FileSystemOptions FileOpts;
   FileManager FileMgr(FileOpts);
@@ -70,49 +23,51 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
   return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
 }
 
-std::vector<StyleRange>
-CodeSnippetHighlighter::highlightLine(StringRef SourceLine,
-                                      const LangOptions &LangOpts) {
-  ensureTokenData();
-
+std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
+    StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
   constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
   constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
   constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
 
-  const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
   SourceManager FakeSM = createTempSourceManager();
+  const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
   Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
   L.SetKeepWhitespaceMode(true);
 
   std::vector<StyleRange> Styles;
   bool Stop = false;
   while (!Stop) {
-    Token tok;
-    Stop = L.LexFromRawLexer(tok);
-    if (tok.is(tok::unknown))
+    Token T;
+    Stop = L.LexFromRawLexer(T);
+    if (T.is(tok::unknown))
       continue;
 
     bool Invalid;
     unsigned Start =
-        FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1;
+        FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
     if (Invalid)
       continue;
 
-    if (tok.is(tok::raw_identifier)) {
-      // Almost everything we lex is an identifier, since we use a raw lexer.
-      // Some should be highlightes as literals, others as keywords.
-      if (Keywords.contains(tok.getRawIdentifier()))
-        Styles.push_back(
-            StyleRange{Start, Start + tok.getLength(), KeywordColor});
-      else if (Literals.contains(tok.getRawIdentifier()))
+    if (T.is(tok::raw_identifier)) {
+      StringRef RawIdent = T.getRawIdentifier();
+      // Special case true/false/nullptr literals, since they will otherwise be
+      // treated as keywords.
+      if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
         Styles.push_back(
-            StyleRange{Start, Start + tok.getLength(), LiteralColor});
-    } else if (tok::isLiteral(tok.getKind())) {
-      Styles.push_back(
-          StyleRange{Start, Start + tok.getLength(), LiteralColor});
-    } else if (tok.is(tok::comment)) {
-      Styles.push_back(
-          StyleRange{Start, Start + tok.getLength(), CommentColor});
+            StyleRange{Start, Start + T.getLength(), LiteralColor});
+      } else {
+        const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
+        assert(II);
+
+        if (II->isKeyword(LangOpts)) {
+          Styles.push_back(
+              StyleRange{Start, Start + T.getLength(), KeywordColor});
+        }
+      }
+    } else if (tok::isLiteral(T.getKind())) {
+      Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor});
+    } else if (T.is(tok::comment)) {
+      Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor});
     }
   }
 
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 8bd13aa72b13235..e5de17794323383 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -646,10 +646,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
   return Wrapped;
 }
 
-TextDiagnostic::TextDiagnostic(raw_ostream &OS,
-                               const LangOptions &LangOpts,
+TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+                               const Preprocessor *PP,
                                DiagnosticOptions *DiagOpts)
-  : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {}
+    : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
 
 TextDiagnostic::~TextDiagnostic() {}
 
@@ -1281,7 +1281,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
                                  unsigned MaxLineNoDisplayWidth,
                                  unsigned LineNo) {
   std::vector<StyleRange> Styles =
-      SnippetHighlighter.highlightLine(SourceLine, LangOpts);
+      SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts);
 
   // Emit line number.
   if (MaxLineNoDisplayWidth > 0) {
diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index 0ff5376098ffe8d..3bc3935078baada 100644
--- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
 void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
                                             const Preprocessor *PP) {
   // Build the TextDiagnostic utility.
-  TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts));
+  TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts));
 }
 
 void TextDiagnosticPrinter::EndSourceFile() {

>From c7cc185eb2309b69da58a5697e8f80ef724c5c82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 20 Sep 2023 17:24:42 +0200
Subject: [PATCH 3/5] Move the PP parameter to the end of the TextDiagnostic
 ctor

---
 clang/include/clang/Frontend/TextDiagnostic.h | 2 +-
 clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 ++
 clang/lib/Frontend/TextDiagnostic.cpp         | 3 +--
 clang/lib/Frontend/TextDiagnosticPrinter.cpp  | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 8cdb9b141a8a4af..43c39ff96a2d1ce 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -38,7 +38,7 @@ class TextDiagnostic : public DiagnosticRenderer {
 
 public:
   TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
-                 const Preprocessor *PP, DiagnosticOptions *DiagOpts);
+                 DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr);
 
   ~TextDiagnostic() override;
 
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 63b3707fbb7ef83..32bd61f3746023c 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -25,6 +25,8 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
 
 std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
     StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
+  if (!PP)
+    return {};
   constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
   constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
   constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index e5de17794323383..8675d3b08e86b71 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -647,8 +647,7 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
 }
 
 TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
-                               const Preprocessor *PP,
-                               DiagnosticOptions *DiagOpts)
+                               DiagnosticOptions *DiagOpts, const Preprocessor *PP)
     : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
 
 TextDiagnostic::~TextDiagnostic() {}
diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index 3bc3935078baada..b2fb762537573ef 100644
--- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
 void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
                                             const Preprocessor *PP) {
   // Build the TextDiagnostic utility.
-  TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts));
+  TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP));
 }
 
 void TextDiagnosticPrinter::EndSourceFile() {

>From fec4eeee662aca00833f0b60fa42be03bbe487de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Thu, 21 Sep 2023 06:38:24 +0200
Subject: [PATCH 4/5] Tune colors

---
 clang/include/clang/Frontend/CodeSnippetHighlighter.h | 2 +-
 clang/lib/Frontend/CodeSnippetHighlighter.cpp         | 6 +++---
 clang/lib/Frontend/TextDiagnostic.cpp                 | 7 ++++---
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index ec03375221f9ffc..c2a0184085d5da4 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -19,7 +19,7 @@ namespace clang {
 struct StyleRange {
   unsigned Start;
   unsigned End;
-  const enum llvm::raw_ostream::Colors c;
+  const enum llvm::raw_ostream::Colors color;
 };
 
 class Preprocessor;
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index 32bd61f3746023c..dba7f5d2848505a 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -27,9 +27,9 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
     StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
   if (!PP)
     return {};
-  constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK;
-  constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
-  constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW;
+  constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
+  constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
+  constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
 
   SourceManager FakeSM = createTempSourceManager();
   const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 8675d3b08e86b71..5ba78f9e51e1259 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <optional>
@@ -1307,14 +1308,14 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
       std::optional<enum raw_ostream::Colors> H;
       for (auto &P : Styles) {
         if (P.Start < I && P.End >= I) {
-          H = P.c;
+          H = P.color;
           break;
         }
       }
 
-      if (H) {
+      if (H)
         OS.changeColor(*H, false);
-      } else
+      else
         OS.resetColor();
     }
 

>From c567a2be8b24a6ce227682fdd74f4d782a1de37d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Thu, 21 Sep 2023 11:01:43 +0200
Subject: [PATCH 5/5] Lex the entire file

---
 .../clang/Frontend/CodeSnippetHighlighter.h   |   6 +-
 clang/include/clang/Frontend/TextDiagnostic.h |   2 +-
 clang/lib/Frontend/CodeSnippetHighlighter.cpp | 139 ++++++++++++------
 clang/lib/Frontend/TextDiagnostic.cpp         |   6 +-
 4 files changed, 107 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
index c2a0184085d5da4..83cd8aebeba12dd 100644
--- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h
+++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h
@@ -23,6 +23,8 @@ struct StyleRange {
 };
 
 class Preprocessor;
+class FileID;
+class SourceManager;
 
 class CodeSnippetHighlighter final {
 public:
@@ -31,9 +33,9 @@ class CodeSnippetHighlighter final {
   /// Produce StyleRanges for the given line.
   /// The returned vector contains non-overlapping style ranges. They are sorted
   /// from beginning of the line to the end.
-  std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine,
+  std::vector<StyleRange> highlightLine(unsigned LineNumber,
                                         const Preprocessor *PP,
-                                        const LangOptions &LangOpts);
+                                        const LangOptions &LangOpts, FileID FID, const SourceManager &SM);
 };
 
 } // namespace clang
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 43c39ff96a2d1ce..d6dbef1746efb44 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -104,7 +104,7 @@ class TextDiagnostic : public DiagnosticRenderer {
                            SmallVectorImpl<CharSourceRange> &Ranges,
                            ArrayRef<FixItHint> Hints);
 
-  void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
+  void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, FileID FID, const SourceManager &SM,
                    unsigned LineNo);
 
   void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
index dba7f5d2848505a..446233e77e6d17b 100644
--- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp
+++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp
@@ -9,34 +9,46 @@
 
 using namespace clang;
 
-static SourceManager createTempSourceManager() {
-  FileSystemOptions FileOpts;
-  FileManager FileMgr(FileOpts);
-  llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs());
-  llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
-  DiagnosticsEngine diags(DiagIDs, DiagOpts);
-  return SourceManager(diags, FileMgr);
-}
-
-static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM,
-                             const LangOptions &LangOpts) {
-  return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts);
-}
+static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
+static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
 
 std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
-    StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) {
+    unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, FileID FID, const SourceManager &SM) {
   if (!PP)
     return {};
-  constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN;
-  constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN;
-  constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
 
-  SourceManager FakeSM = createTempSourceManager();
-  const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine);
-  Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts);
+  // Classify the given token and append it to the given vector.
+  auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, const Token &T, unsigned Start, unsigned Length) -> void {
+    if (T.is(tok::raw_identifier)) {
+      StringRef RawIdent = T.getRawIdentifier();
+      // Special case true/false/nullptr literals, since they will otherwise be
+      // treated as keywords.
+      if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
+        Vec.push_back(
+            StyleRange{Start, Start + Length, LiteralColor});
+      } else {
+        const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
+        assert(II);
+
+        if (II->isKeyword(LangOpts)) {
+          Vec.push_back(
+              StyleRange{Start, Start + Length, KeywordColor});
+        }
+      }
+    } else if (tok::isLiteral(T.getKind())) {
+      Vec.push_back(StyleRange{Start, Start + Length, LiteralColor});
+    } else if (T.is(tok::comment)) {
+      Vec.push_back(StyleRange{Start, Start + Length, CommentColor});
+    }
+  };
+
+  auto Buff = SM.getBufferOrNone(FID);
+  assert(Buff);
+  Lexer L = Lexer(FID, *Buff, SM, LangOpts);
   L.SetKeepWhitespaceMode(true);
+  std::vector<std::vector<StyleRange>> Lines;
 
-  std::vector<StyleRange> Styles;
   bool Stop = false;
   while (!Stop) {
     Token T;
@@ -45,33 +57,78 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine(
       continue;
 
     bool Invalid;
-    unsigned Start =
-        FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+    unsigned StartCol =
+        SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+    if (Invalid)
+      continue;
+    unsigned StartLine =
+      SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1;
     if (Invalid)
       continue;
 
-    if (T.is(tok::raw_identifier)) {
-      StringRef RawIdent = T.getRawIdentifier();
-      // Special case true/false/nullptr literals, since they will otherwise be
-      // treated as keywords.
-      if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") {
-        Styles.push_back(
-            StyleRange{Start, Start + T.getLength(), LiteralColor});
-      } else {
-        const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
-        assert(II);
+    while (Lines.size() <= StartLine)
+      Lines.push_back({});
 
-        if (II->isKeyword(LangOpts)) {
-          Styles.push_back(
-              StyleRange{Start, Start + T.getLength(), KeywordColor});
+
+    unsigned EndLine = 
+      SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1;
+    if (Invalid)
+      continue;
+
+    // Simple tokens.
+    if (StartLine == EndLine) {
+      appendStyle(Lines[StartLine], T, StartCol, T.getLength());
+      continue;
+    }
+    unsigned NumLines = EndLine - StartLine;
+
+    // For tokens that span multiple lines (think multiline comments), we
+    // divide them into multiple StyleRanges.
+    unsigned EndCol =
+        SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
+    if (Invalid)
+      continue;
+
+    std::string Spelling = Lexer::getSpelling(T, SM , LangOpts);
+
+    unsigned L = 0;
+    unsigned LineLength = 0;
+    for (unsigned I = 0; I <= Spelling.size(); ++I) {
+      // This line is done.
+      if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) {
+        if (Lines.size() <= StartLine + L) {
+          Lines.push_back({});
         }
+
+        if (L == 0) // First line
+          appendStyle(Lines[StartLine + L], T, StartCol, LineLength);
+        else if (L == NumLines) // Last line
+          appendStyle(Lines[StartLine + L], T, 0, EndCol);
+        else
+          appendStyle(Lines[StartLine + L], T, 0, LineLength);
+        ++L;
+        LineLength = 0;
+        continue;
       }
-    } else if (tok::isLiteral(T.getKind())) {
-      Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor});
-    } else if (T.is(tok::comment)) {
-      Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor});
+      ++LineLength;
     }
   }
 
-  return Styles;
+
+#if 0
+  llvm::errs() << "--\nLine Style info: \n";
+  int I = 0;
+  for (std::vector<StyleRange> &Line : Lines) {
+    llvm::errs() << I << ": ";
+    for (const auto &R : Line) {
+      llvm::errs() << "{" << R.Start << ", " << R.End << "}, ";
+    }
+    llvm::errs() << "\n";
+
+    ++I;
+  }
+#endif
+
+
+  return Lines[LineNumber];
 }
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 5ba78f9e51e1259..f762e112de066e3 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -1249,7 +1249,7 @@ void TextDiagnostic::emitSnippetAndCaret(
     }
 
     // Emit what we have computed.
-    emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
+    emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, DisplayLineNo);
 
     if (!CaretLine.empty()) {
       indentForLineNumbers();
@@ -1279,9 +1279,11 @@ void TextDiagnostic::emitSnippetAndCaret(
 
 void TextDiagnostic::emitSnippet(StringRef SourceLine,
                                  unsigned MaxLineNoDisplayWidth,
+                                 FileID FID,
+                                 const SourceManager &SM,
                                  unsigned LineNo) {
   std::vector<StyleRange> Styles =
-      SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts);
+      SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM);
 
   // Emit line number.
   if (MaxLineNoDisplayWidth > 0) {



More information about the cfe-commits mailing list