[clang-tools-extra] [clangd] Improve Markup Rendering (PR #140498)
via cfe-commits
cfe-commits at lists.llvm.org
Sun May 18 23:47:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangd
@llvm/pr-subscribers-clang-tools-extra
Author: None (tcottin)
<details>
<summary>Changes</summary>
This is a preparation for fixing clangd/clangd#<!-- -->529.
It changes the Markup rendering to markdown and plaintext.
- Properly separate paragraphs using an empty line between
- Dont escape markdown syntax for markdown output except for HTML
- Dont do any formatting for markdown because the client is handling the actual markdown rendering
---
Patch is 41.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140498.diff
7 Files Affected:
- (modified) clang-tools-extra/clangd/Hover.cpp (+13-68)
- (modified) clang-tools-extra/clangd/support/Markup.cpp (+147-105)
- (modified) clang-tools-extra/clangd/support/Markup.h (+26-6)
- (modified) clang-tools-extra/clangd/test/signature-help.test (+2-2)
- (modified) clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp (+4-4)
- (modified) clang-tools-extra/clangd/unittests/HoverTests.cpp (+51-24)
- (modified) clang-tools-extra/clangd/unittests/support/MarkupTests.cpp (+167-47)
``````````diff
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index 3ab3d89030520..88755733aa67c 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -960,42 +960,6 @@ std::optional<HoverInfo> getHoverContents(const Attr *A, ParsedAST &AST) {
return HI;
}
-bool isParagraphBreak(llvm::StringRef Rest) {
- return Rest.ltrim(" \t").starts_with("\n");
-}
-
-bool punctuationIndicatesLineBreak(llvm::StringRef Line) {
- constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
-
- Line = Line.rtrim();
- return !Line.empty() && Punctuation.contains(Line.back());
-}
-
-bool isHardLineBreakIndicator(llvm::StringRef Rest) {
- // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
- // '#' headings, '`' code blocks
- constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
-
- Rest = Rest.ltrim(" \t");
- if (Rest.empty())
- return false;
-
- if (LinebreakIndicators.contains(Rest.front()))
- return true;
-
- if (llvm::isDigit(Rest.front())) {
- llvm::StringRef AfterDigit = Rest.drop_while(llvm::isDigit);
- if (AfterDigit.starts_with(".") || AfterDigit.starts_with(")"))
- return true;
- }
- return false;
-}
-
-bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) {
- // Should we also consider whether Line is short?
- return punctuationIndicatesLineBreak(Line) || isHardLineBreakIndicator(Rest);
-}
-
void addLayoutInfo(const NamedDecl &ND, HoverInfo &HI) {
if (ND.isInvalidDecl())
return;
@@ -1601,51 +1565,32 @@ std::optional<llvm::StringRef> getBacktickQuoteRange(llvm::StringRef Line,
return Line.slice(Offset, Next + 1);
}
-void parseDocumentationLine(llvm::StringRef Line, markup::Paragraph &Out) {
+void parseDocumentationParagraph(llvm::StringRef Text, markup::Paragraph &Out) {
// Probably this is appendText(Line), but scan for something interesting.
- for (unsigned I = 0; I < Line.size(); ++I) {
- switch (Line[I]) {
+ for (unsigned I = 0; I < Text.size(); ++I) {
+ switch (Text[I]) {
case '`':
- if (auto Range = getBacktickQuoteRange(Line, I)) {
- Out.appendText(Line.substr(0, I));
+ if (auto Range = getBacktickQuoteRange(Text, I)) {
+ Out.appendText(Text.substr(0, I));
Out.appendCode(Range->trim("`"), /*Preserve=*/true);
- return parseDocumentationLine(Line.substr(I + Range->size()), Out);
+ return parseDocumentationParagraph(Text.substr(I + Range->size()), Out);
}
break;
}
}
- Out.appendText(Line).appendSpace();
+ Out.appendText(Text);
}
void parseDocumentation(llvm::StringRef Input, markup::Document &Output) {
- std::vector<llvm::StringRef> ParagraphLines;
- auto FlushParagraph = [&] {
- if (ParagraphLines.empty())
- return;
- auto &P = Output.addParagraph();
- for (llvm::StringRef Line : ParagraphLines)
- parseDocumentationLine(Line, P);
- ParagraphLines.clear();
- };
+ llvm::StringRef Paragraph, Rest;
+ for (std::tie(Paragraph, Rest) = Input.split("\n\n");
+ !(Paragraph.empty() && Rest.empty());
+ std::tie(Paragraph, Rest) = Rest.split("\n\n")) {
- llvm::StringRef Line, Rest;
- for (std::tie(Line, Rest) = Input.split('\n');
- !(Line.empty() && Rest.empty());
- std::tie(Line, Rest) = Rest.split('\n')) {
-
- // After a linebreak remove spaces to avoid 4 space markdown code blocks.
- // FIXME: make FlushParagraph handle this.
- Line = Line.ltrim();
- if (!Line.empty())
- ParagraphLines.push_back(Line);
-
- if (isParagraphBreak(Rest) || isHardLineBreakAfter(Line, Rest)) {
- FlushParagraph();
- }
+ if (!Paragraph.empty())
+ parseDocumentationParagraph(Paragraph, Output.addParagraph());
}
- FlushParagraph();
}
-
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const HoverInfo::PrintedType &T) {
OS << T.Type;
diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp
index 63aff96b02056..b1e6252e473f5 100644
--- a/clang-tools-extra/clangd/support/Markup.cpp
+++ b/clang-tools-extra/clangd/support/Markup.cpp
@@ -11,7 +11,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <iterator>
@@ -56,80 +55,28 @@ bool looksLikeTag(llvm::StringRef Contents) {
return true; // Potentially incomplete tag.
}
-// Tests whether C should be backslash-escaped in markdown.
-// The string being escaped is Before + C + After. This is part of a paragraph.
-// StartsLine indicates whether `Before` is the start of the line.
-// After may not be everything until the end of the line.
-//
-// It's always safe to escape punctuation, but want minimal escaping.
-// The strategy is to escape the first character of anything that might start
-// a markdown grammar construct.
-bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
- bool StartsLine) {
- assert(Before.take_while(llvm::isSpace).empty());
- auto RulerLength = [&]() -> /*Length*/ unsigned {
- if (!StartsLine || !Before.empty())
- return false;
- llvm::StringRef A = After.rtrim();
- return llvm::all_of(A, [C](char D) { return C == D; }) ? 1 + A.size() : 0;
- };
- auto IsBullet = [&]() {
- return StartsLine && Before.empty() &&
- (After.empty() || After.starts_with(" "));
- };
- auto SpaceSurrounds = [&]() {
- return (After.empty() || llvm::isSpace(After.front())) &&
- (Before.empty() || llvm::isSpace(Before.back()));
- };
- auto WordSurrounds = [&]() {
- return (!After.empty() && llvm::isAlnum(After.front())) &&
- (!Before.empty() && llvm::isAlnum(Before.back()));
- };
-
+/// \brief Tests whether \p C should be backslash-escaped in markdown.
+///
+/// The MarkupContent LSP specification defines that `markdown` content needs to
+/// follow GFM (GitHub Flavored Markdown) rules. And we can assume that markdown
+/// is rendered on the client side. This means we do not need to escape any
+/// markdown constructs.
+/// The only exception is when the client does not support HTML rendering in
+/// markdown. In that case, we need to escape HTML tags and HTML entities.
+///
+/// **FIXME:** handle the case when the client does support HTML rendering in
+/// markdown. For this, the LSP server needs to check the
+/// [supportsHtml capability](https://github.com/microsoft/language-server-protocol/issues/1344)
+/// of the client.
+///
+/// \param C The character to check.
+/// \param After The string that follows \p C . This is used to determine if \p C is
+/// part of a tag or an entity reference.
+/// \returns true if \p C should be escaped, false otherwise.
+bool needsLeadingEscape(char C, llvm::StringRef After) {
switch (C) {
- case '\\': // Escaped character.
- return true;
- case '`': // Code block or inline code
- // Any number of backticks can delimit an inline code block that can end
- // anywhere (including on another line). We must escape them all.
- return true;
- case '~': // Code block
- return StartsLine && Before.empty() && After.starts_with("~~");
- case '#': { // ATX heading.
- if (!StartsLine || !Before.empty())
- return false;
- llvm::StringRef Rest = After.ltrim(C);
- return Rest.empty() || Rest.starts_with(" ");
- }
- case ']': // Link or link reference.
- // We escape ] rather than [ here, because it's more constrained:
- // ](...) is an in-line link
- // ]: is a link reference
- // The following are only links if the link reference exists:
- // ] by itself is a shortcut link
- // ][...] is an out-of-line link
- // Because we never emit link references, we don't need to handle these.
- return After.starts_with(":") || After.starts_with("(");
- case '=': // Setex heading.
- return RulerLength() > 0;
- case '_': // Horizontal ruler or matched delimiter.
- if (RulerLength() >= 3)
- return true;
- // Not a delimiter if surrounded by space, or inside a word.
- // (The rules at word boundaries are subtle).
- return !(SpaceSurrounds() || WordSurrounds());
- case '-': // Setex heading, horizontal ruler, or bullet.
- if (RulerLength() > 0)
- return true;
- return IsBullet();
- case '+': // Bullet list.
- return IsBullet();
- case '*': // Bullet list, horizontal ruler, or delimiter.
- return IsBullet() || RulerLength() >= 3 || !SpaceSurrounds();
case '<': // HTML tag (or autolink, which we choose not to escape)
return looksLikeTag(After);
- case '>': // Quote marker. Needs escaping at start of line.
- return StartsLine && Before.empty();
case '&': { // HTML entity reference
auto End = After.find(';');
if (End == llvm::StringRef::npos)
@@ -142,10 +89,6 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
}
return llvm::all_of(Content, llvm::isAlpha);
}
- case '.': // Numbered list indicator. Escape 12. -> 12\. at start of line.
- case ')':
- return StartsLine && !Before.empty() &&
- llvm::all_of(Before, llvm::isDigit) && After.starts_with(" ");
default:
return false;
}
@@ -156,8 +99,7 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
std::string renderText(llvm::StringRef Input, bool StartsLine) {
std::string R;
for (unsigned I = 0; I < Input.size(); ++I) {
- if (needsLeadingEscape(Input[I], Input.substr(0, I), Input.substr(I + 1),
- StartsLine))
+ if (needsLeadingEscape(Input[I], Input.substr(I + 1)))
R.push_back('\\');
R.push_back(Input[I]);
}
@@ -303,11 +245,12 @@ class CodeBlock : public Block {
std::string indentLines(llvm::StringRef Input) {
assert(!Input.ends_with("\n") && "Input should've been trimmed.");
std::string IndentedR;
- // We'll add 2 spaces after each new line.
+ // We'll add 2 spaces after each new line which is not followed by another new line.
IndentedR.reserve(Input.size() + Input.count('\n') * 2);
- for (char C : Input) {
+ for (size_t I = 0; I < Input.size(); ++I) {
+ char C = Input[I];
IndentedR += C;
- if (C == '\n')
+ if (C == '\n' && (((I + 1) < Input.size()) && (Input[I + 1] != '\n')))
IndentedR.append(" ");
}
return IndentedR;
@@ -348,20 +291,24 @@ void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
if (C.SpaceBefore || NeedsSpace)
OS << " ";
switch (C.Kind) {
- case Chunk::PlainText:
+ case ChunkKind::PlainText:
OS << renderText(C.Contents, !HasChunks);
break;
- case Chunk::InlineCode:
+ case ChunkKind::InlineCode:
OS << renderInlineBlock(C.Contents);
break;
+ case ChunkKind::Bold:
+ OS << "**" << renderText(C.Contents, !HasChunks) << "**";
+ break;
+ case ChunkKind::Emphasized:
+ OS << "*" << renderText(C.Contents, !HasChunks) << "*";
+ break;
}
HasChunks = true;
NeedsSpace = C.SpaceAfter;
}
- // Paragraphs are translated into markdown lines, not markdown paragraphs.
- // Therefore it only has a single linebreak afterwards.
- // VSCode requires two spaces at the end of line to start a new one.
- OS << " \n";
+ // A paragraph in markdown is separated by a blank line.
+ OS << "\n\n";
}
std::unique_ptr<Block> Paragraph::clone() const {
@@ -370,8 +317,8 @@ std::unique_ptr<Block> Paragraph::clone() const {
/// Choose a marker to delimit `Text` from a prioritized list of options.
/// This is more readable than escaping for plain-text.
-llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
- llvm::StringRef Text) {
+llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
+ llvm::StringRef Text) const {
// Prefer a delimiter whose characters don't appear in the text.
for (llvm::StringRef S : Options)
if (Text.find_first_of(S) == llvm::StringRef::npos)
@@ -379,18 +326,94 @@ llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
return Options.front();
}
+bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const{
+ constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
+
+ Line = Line.rtrim();
+ return !Line.empty() && Punctuation.contains(Line.back());
+}
+
+bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
+ // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
+ // '#' headings, '`' code blocks, two spaces (markdown force newline)
+ constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
+
+ Rest = Rest.ltrim(" \t");
+ if (Rest.empty())
+ return false;
+
+ if (LinebreakIndicators.contains(Rest.front()))
+ return true;
+
+ if (llvm::isDigit(Rest.front())) {
+ llvm::StringRef AfterDigit = Rest.drop_while(llvm::isDigit);
+ if (AfterDigit.starts_with(".") || AfterDigit.starts_with(")"))
+ return true;
+ }
+ return false;
+}
+
+bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
+ llvm::StringRef Rest) const {
+ // In Markdown, 2 spaces before a line break forces a line break.
+ // Add a line break for plaintext in this case too.
+ // Should we also consider whether Line is short?
+ return Line.ends_with(" ") || punctuationIndicatesLineBreak(Line) ||
+ isHardLineBreakIndicator(Rest);
+}
+
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
bool NeedsSpace = false;
+ std::string ConcatenatedText;
+ llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
+
for (auto &C : Chunks) {
+
+ if (C.Kind == ChunkKind::PlainText) {
+ if (C.SpaceBefore || NeedsSpace)
+ ConcatenatedOS << ' ';
+
+ ConcatenatedOS << C.Contents;
+ NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
+ continue;
+ }
+
if (C.SpaceBefore || NeedsSpace)
- OS << " ";
+ ConcatenatedOS << ' ';
llvm::StringRef Marker = "";
- if (C.Preserve && C.Kind == Chunk::InlineCode)
+ if (C.Preserve && C.Kind == ChunkKind::InlineCode)
Marker = chooseMarker({"`", "'", "\""}, C.Contents);
- OS << Marker << C.Contents << Marker;
+ else if (C.Kind == ChunkKind::Bold)
+ Marker = "**";
+ else if (C.Kind == ChunkKind::Emphasized)
+ Marker = "*";
+ ConcatenatedOS << Marker << C.Contents << Marker;
NeedsSpace = C.SpaceAfter;
}
- OS << '\n';
+
+ // We go through the contents line by line to handle the newlines
+ // and required spacing correctly.
+ llvm::StringRef Line, Rest;
+
+ for (std::tie(Line, Rest) =
+ llvm::StringRef(ConcatenatedText).trim().split('\n');
+ !(Line.empty() && Rest.empty());
+ std::tie(Line, Rest) = Rest.split('\n')) {
+
+ Line = Line.ltrim();
+ if (Line.empty())
+ continue;
+
+ OS << canonicalizeSpaces(Line);
+
+ if (isHardLineBreakAfter(Line, Rest))
+ OS << '\n';
+ else if (!Rest.empty())
+ OS << ' ';
+ }
+
+ // Paragraphs are separated by a blank line.
+ OS << "\n\n";
}
BulletList::BulletList() = default;
@@ -398,12 +421,13 @@ BulletList::~BulletList() = default;
void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
for (auto &D : Items) {
+ std::string M = D.asMarkdown();
// Instead of doing this we might prefer passing Indent to children to get
// rid of the copies, if it turns out to be a bottleneck.
- OS << "- " << indentLines(D.asMarkdown()) << '\n';
+ OS << "- " << indentLines(M) << '\n';
}
// We need a new line after list to terminate it in markdown.
- OS << '\n';
+ OS << "\n\n";
}
void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
@@ -412,6 +436,7 @@ void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
// rid of the copies, if it turns out to be a bottleneck.
OS << "- " << indentLines(D.asPlainText()) << '\n';
}
+ OS << '\n';
}
Paragraph &Paragraph::appendSpace() {
@@ -420,29 +445,44 @@ Paragraph &Paragraph::appendSpace() {
return *this;
}
-Paragraph &Paragraph::appendText(llvm::StringRef Text) {
- std::string Norm = canonicalizeSpaces(Text);
- if (Norm.empty())
+Paragraph &Paragraph::appendChunk(llvm::StringRef Contents, ChunkKind K) {
+ if (Contents.empty())
return *this;
Chunks.emplace_back();
Chunk &C = Chunks.back();
- C.Contents = std::move(Norm);
- C.Kind = Chunk::PlainText;
- C.SpaceBefore = llvm::isSpace(Text.front());
- C.SpaceAfter = llvm::isSpace(Text.back());
+ C.Contents = std::move(Contents);
+ C.Kind = K;
return *this;
}
+Paragraph &Paragraph::appendText(llvm::StringRef Text) {
+ if (!Chunks.empty() && Chunks.back().Kind == ChunkKind::PlainText) {
+ Chunks.back().Contents += std::move(Text);
+ return *this;
+ }
+
+ return appendChunk(Text, ChunkKind::PlainText);
+}
+
+Paragraph &Paragraph::appendEmphasizedText(llvm::StringRef Text) {
+ return appendChunk(canonicalizeSpaces(std::move(Text)),
+ ChunkKind::Emphasized);
+}
+
+Paragraph &Paragraph::appendBoldText(llvm::StringRef Text) {
+ return appendChunk(canonicalizeSpaces(std::move(Text)), ChunkKind::Bold);
+}
+
Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
bool AdjacentCode =
- !Chunks.empty() && Chunks.back().Kind == Chunk::InlineCode;
+ !Chunks.empty() && Chunks.back().Kind == ChunkKind::InlineCode;
std::string Norm = canonicalizeSpaces(std::move(Code));
if (Norm.empty())
return *this;
Chunks.emplace_back();
Chunk &C = Chunks.back();
C.Contents = std::move(Norm);
- C.Kind = Chunk::InlineCode;
+ C.Kind = ChunkKind::InlineCode;
C.Preserve = Preserve;
// Disallow adjacent code spans without spaces, markdown can't render them.
C.SpaceBefore = AdjacentCode;
@@ -475,7 +515,9 @@ Paragraph &Document::addParagraph() {
return *static_cast<Paragraph *>(Children.back().get());
}
-void Document::addRuler() { Children.push_back(std::make_unique<Ruler>()); }
+void Document::addRuler() {
+ Children.push_back(std::make_unique<Ruler>());
+}
void Document::addCodeBlock(std::string Code, std::string Language) {
Children.emplace_back(
diff --git a/clang-tools-extra/clangd/support/Markup.h b/clang-tools-extra/clangd/support/Markup.h
index 3a869c49a2cbb..a74fade13d115 100644
--- a/clang-tools-extra/clangd/support/Markup.h
+++ b/clang-tools-extra/clangd/support/Markup.h
@@ -49,6 +49,12 @@ class Paragraph : public Block {
/// Append plain text to the end of the string.
Paragraph &appendText(llvm::StringRef Text);
+ /// Append emphasized text, this translates to the * block in markdown.
+ Paragraph &appendEmphasizedText(llvm::StringRef Text);
+
+ /// Append bold text, this translates to the ** block in markdown.
+ Paragraph &appendBoldText(llvm::StringRef Text);
+
/// Append inline code, this translates to the ` block in markdown.
/// \p Preserve indicates the code span must be apparent even in plaintext.
Paragraph &appendCode(llvm::StringRef Code, bool Preserve = false);
@@ -58,11 +64,9 @@ class Paragraph : public Block {
Paragraph &appendSpace();
private:
+ typedef enum { PlainText, InlineCode, Bold, Emphasized } ChunkKind;
struct Chunk {
- enum {
- PlainText,
- InlineCode,
- } Kind = PlainText;
+ ChunkKind Kind = PlainText;
// Preserve chunk markers in plaintext.
bool Preserve = false;
std::string Contents;
@@ -73,6 +77,19 @@ class Paragraph : public Block {
bool SpaceAfter = false;
};
std::vector<Chunk> Chunks;
+
+ Paragraph &appendChunk(llvm::StringRef Contents, ChunkKind K);
+
+ llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
+ llvm::StringRef Text) const;
+ bool punctuationIndicatesLineBreak(llvm::StringRef Line) const;
+ bool isHardLineBreakIndicator(llvm::StringRef Rest) const;
+ bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const;
+};
+
+class ListItemParagraph : public Paragraph {
+public:
+ void renderMarkdown(llvm::raw_ostream &OS) const override;
};
/// Represents a sequence of one or more documents. Knows how to print them in a
@@ -82,6 +99,9 @@ class BulletList : public Block {
BulletList();
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140498
More information about the cfe-commits
mailing list