[clang-tools-extra] [clang-doc] Wire Markdown parser into MDGenerator for fenced code blocks (PR #201746)
Neil Nair via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 4 23:24:38 PDT 2026
https://github.com/Neil-N4 updated https://github.com/llvm/llvm-project/pull/201746
>From 87ed388807b9239da05c1433ae253456f44fcf1f Mon Sep 17 00:00:00 2001
From: Neil-N4 <65729206+Neil-N4 at users.noreply.github.com>
Date: Thu, 28 May 2026 19:23:48 -0400
Subject: [PATCH 01/13] [clang-doc] Add standalone Markdown parsing library
---
clang-tools-extra/clang-doc/CMakeLists.txt | 1 +
clang-tools-extra/clang-doc/Markdown.cpp | 133 +++++++++++++++++++++
clang-tools-extra/clang-doc/Markdown.h | 59 +++++++++
3 files changed, 193 insertions(+)
create mode 100644 clang-tools-extra/clang-doc/Markdown.cpp
create mode 100644 clang-tools-extra/clang-doc/Markdown.h
diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt b/clang-tools-extra/clang-doc/CMakeLists.txt
index 22e2c8159e9f6..4f69385bdccc3 100644
--- a/clang-tools-extra/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/CMakeLists.txt
@@ -12,6 +12,7 @@ add_clang_library(clangDoc STATIC
Generators.cpp
HTMLGenerator.cpp
Mapper.cpp
+ Markdown.cpp
MDGenerator.cpp
Representation.cpp
Serialize.cpp
diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp
new file mode 100644
index 0000000000000..87053c94b0566
--- /dev/null
+++ b/clang-tools-extra/clang-doc/Markdown.cpp
@@ -0,0 +1,133 @@
+//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Markdown.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+static MDNode makeText(llvm::StringRef S) {
+ return {NodeKind::Text, S, {}};
+}
+
+// A line is a table separator if it only contains |, -, :, and spaces,
+// and has at least one -.
+static bool isSepRow(llvm::StringRef Line) {
+ return llvm::all_of(Line, [](char C) {
+ return C == '|' || C == '-' || C == ':' || C == ' ';
+ }) && Line.contains('-');
+}
+
+static llvm::ArrayRef<MDNode>
+allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes,
+ llvm::BumpPtrAllocator &Arena) {
+ if (Nodes.empty())
+ return {};
+ MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size());
+ std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated);
+ return llvm::ArrayRef<MDNode>(Allocated, Nodes.size());
+}
+
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+ llvm::BumpPtrAllocator &Arena) {
+ if (ParagraphText.trim().empty())
+ return {};
+
+ llvm::SmallVector<llvm::StringRef, 16> Lines;
+ ParagraphText.split(Lines, '\n');
+
+ llvm::SmallVector<MDNode, 8> Nodes;
+ unsigned I = 0;
+
+ while (I < Lines.size()) {
+ llvm::StringRef Line = Lines[I].trim();
+
+ if (Line.empty()) {
+ ++I;
+ continue;
+ }
+
+ // Fenced code block: ``` or ~~~
+ if (Line.starts_with("```") || Line.starts_with("~~~")) {
+ char Fence = Line[0];
+ llvm::StringRef Lang = Line.drop_front(3).trim();
+ llvm::SmallVector<MDNode, 4> CodeLines;
+ ++I;
+ while (I < Lines.size()) {
+ llvm::StringRef CodeLine = Lines[I].trim();
+ if (CodeLine.size() >= 3 &&
+ llvm::all_of(CodeLine.take_front(3),
+ [Fence](char C) { return C == Fence; }))
+ break;
+ CodeLines.push_back(makeText(Lines[I]));
+ ++I;
+ }
+ ++I; // skip closing fence
+ MDNode Code;
+ Code.Kind = NodeKind::FencedCode;
+ Code.Content = Lang;
+ Code.Children = allocateNodes(CodeLines, Arena);
+ Nodes.push_back(Code);
+ continue;
+ }
+
+ // Pipe table: current line has | and next line is a separator row
+ if (Line.contains('|') && I + 1 < Lines.size() &&
+ isSepRow(Lines[I + 1].trim())) {
+ llvm::SmallVector<MDNode, 4> Rows;
+ while (I < Lines.size() && Lines[I].trim().contains('|')) {
+ Rows.push_back(makeText(Lines[I].trim()));
+ ++I;
+ }
+ MDNode Table;
+ Table.Kind = NodeKind::Table;
+ Table.Content = {};
+ Table.Children = allocateNodes(Rows, Arena);
+ Nodes.push_back(Table);
+ continue;
+ }
+
+ // Unordered list item
+ if (Line.starts_with("- ") || Line.starts_with("* ") ||
+ Line.starts_with("+ ")) {
+ llvm::SmallVector<MDNode, 4> Items;
+ while (I < Lines.size()) {
+ llvm::StringRef L = Lines[I].trim();
+ if (!L.starts_with("- ") && !L.starts_with("* ") &&
+ !L.starts_with("+ "))
+ break;
+ MDNode Item;
+ Item.Kind = NodeKind::ListItem;
+ Item.Content = L.drop_front(2).trim();
+ Item.Children = {};
+ Items.push_back(Item);
+ ++I;
+ }
+ MDNode List;
+ List.Kind = NodeKind::UnorderedList;
+ List.Content = {};
+ List.Children = allocateNodes(Items, Arena);
+ Nodes.push_back(List);
+ continue;
+ }
+
+ // Plain text fallback
+ Nodes.push_back(makeText(Line));
+ ++I;
+ }
+
+ return allocateNodes(Nodes, Arena);
+}
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/Markdown.h b/clang-tools-extra/clang-doc/Markdown.h
new file mode 100644
index 0000000000000..c3374f06e2278
--- /dev/null
+++ b/clang-tools-extra/clang-doc/Markdown.h
@@ -0,0 +1,59 @@
+//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a standalone Markdown parsing library for the LLVM
+// ecosystem. The parser takes plain text and returns a tree of typed nodes
+// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+enum class NodeKind : uint8_t {
+ // Block nodes
+ Paragraph,
+ FencedCode,
+ Table,
+ UnorderedList,
+ OrderedList,
+ ListItem,
+ ThematicBreak,
+ // Inline nodes
+ Text,
+ InlineCode,
+ Emphasis,
+ Strong,
+ SoftBreak,
+};
+
+struct MDNode {
+ NodeKind Kind;
+ llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
+ llvm::ArrayRef<MDNode> Children; // arena allocated
+};
+
+// Parses Markdown from a single comment paragraph's text.
+// Returns an empty ArrayRef if no Markdown constructs are found,
+// so generators can fall back to plain-text rendering at zero cost.
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+ llvm::BumpPtrAllocator &Arena);
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file
>From d350bf1e3fb4090bc65bcd7e5666e87c7b319b18 Mon Sep 17 00:00:00 2001
From: Neil-N4 <65729206+Neil-N4 at users.noreply.github.com>
Date: Thu, 28 May 2026 19:34:38 -0400
Subject: [PATCH 02/13] [clang-doc] Fix formatting
---
clang-tools-extra/clang-doc/Markdown.cpp | 8 +++++---
clang-tools-extra/clang-doc/Markdown.h | 2 +-
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp
index 87053c94b0566..904d8e92dff17 100644
--- a/clang-tools-extra/clang-doc/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/Markdown.cpp
@@ -22,9 +22,11 @@ static MDNode makeText(llvm::StringRef S) {
// A line is a table separator if it only contains |, -, :, and spaces,
// and has at least one -.
static bool isSepRow(llvm::StringRef Line) {
- return llvm::all_of(Line, [](char C) {
- return C == '|' || C == '-' || C == ':' || C == ' ';
- }) && Line.contains('-');
+ return llvm::all_of(Line,
+ [](char C) {
+ return C == '|' || C == '-' || C == ':' || C == ' ';
+ }) &&
+ Line.contains('-');
}
static llvm::ArrayRef<MDNode>
diff --git a/clang-tools-extra/clang-doc/Markdown.h b/clang-tools-extra/clang-doc/Markdown.h
index c3374f06e2278..bf4815e068b53 100644
--- a/clang-tools-extra/clang-doc/Markdown.h
+++ b/clang-tools-extra/clang-doc/Markdown.h
@@ -42,7 +42,7 @@ enum class NodeKind : uint8_t {
struct MDNode {
NodeKind Kind;
- llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
+ llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
llvm::ArrayRef<MDNode> Children; // arena allocated
};
>From 73a9197525f835ccd3ebee0fc89dad83d19bb0ad Mon Sep 17 00:00:00 2001
From: Neil-N4 <65729206+Neil-N4 at users.noreply.github.com>
Date: Thu, 28 May 2026 19:42:16 -0400
Subject: [PATCH 03/13] [clang-doc] Fix formatting
---
clang-tools-extra/clang-doc/Markdown.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp
index 904d8e92dff17..17ee61369fb6b 100644
--- a/clang-tools-extra/clang-doc/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/Markdown.cpp
@@ -15,9 +15,7 @@ namespace clang {
namespace doc {
namespace markdown {
-static MDNode makeText(llvm::StringRef S) {
- return {NodeKind::Text, S, {}};
-}
+static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
// A line is a table separator if it only contains |, -, :, and spaces,
// and has at least one -.
>From 1a899ed0122a7a15787ebe1760225b46289d50ba Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 29 May 2026 14:21:18 -0400
Subject: [PATCH 04/13] [clang-doc] Move Markdown library to support folder,
fix headers and enum prefixes
---
clang-tools-extra/clang-doc/CMakeLists.txt | 1 -
.../clang-doc/support/CMakeLists.txt | 3 +-
.../clang-doc/support/Markdown.cpp | 133 ++++++++++++++++++
.../clang-doc/support/Markdown.h | 59 ++++++++
4 files changed, 194 insertions(+), 2 deletions(-)
create mode 100644 clang-tools-extra/clang-doc/support/Markdown.cpp
create mode 100644 clang-tools-extra/clang-doc/support/Markdown.h
diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt b/clang-tools-extra/clang-doc/CMakeLists.txt
index 4f69385bdccc3..22e2c8159e9f6 100644
--- a/clang-tools-extra/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/CMakeLists.txt
@@ -12,7 +12,6 @@ add_clang_library(clangDoc STATIC
Generators.cpp
HTMLGenerator.cpp
Mapper.cpp
- Markdown.cpp
MDGenerator.cpp
Representation.cpp
Serialize.cpp
diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt b/clang-tools-extra/clang-doc/support/CMakeLists.txt
index 8ac913ffbe998..acff865190ff9 100644
--- a/clang-tools-extra/clang-doc/support/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt
@@ -6,5 +6,6 @@ set(LLVM_LINK_COMPONENTS
add_clang_library(clangDocSupport STATIC
File.cpp
+ Markdown.cpp
Utils.cpp
- )
+ )
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp
new file mode 100644
index 0000000000000..17ee61369fb6b
--- /dev/null
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -0,0 +1,133 @@
+//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Markdown.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
+
+// A line is a table separator if it only contains |, -, :, and spaces,
+// and has at least one -.
+static bool isSepRow(llvm::StringRef Line) {
+ return llvm::all_of(Line,
+ [](char C) {
+ return C == '|' || C == '-' || C == ':' || C == ' ';
+ }) &&
+ Line.contains('-');
+}
+
+static llvm::ArrayRef<MDNode>
+allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes,
+ llvm::BumpPtrAllocator &Arena) {
+ if (Nodes.empty())
+ return {};
+ MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size());
+ std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated);
+ return llvm::ArrayRef<MDNode>(Allocated, Nodes.size());
+}
+
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+ llvm::BumpPtrAllocator &Arena) {
+ if (ParagraphText.trim().empty())
+ return {};
+
+ llvm::SmallVector<llvm::StringRef, 16> Lines;
+ ParagraphText.split(Lines, '\n');
+
+ llvm::SmallVector<MDNode, 8> Nodes;
+ unsigned I = 0;
+
+ while (I < Lines.size()) {
+ llvm::StringRef Line = Lines[I].trim();
+
+ if (Line.empty()) {
+ ++I;
+ continue;
+ }
+
+ // Fenced code block: ``` or ~~~
+ if (Line.starts_with("```") || Line.starts_with("~~~")) {
+ char Fence = Line[0];
+ llvm::StringRef Lang = Line.drop_front(3).trim();
+ llvm::SmallVector<MDNode, 4> CodeLines;
+ ++I;
+ while (I < Lines.size()) {
+ llvm::StringRef CodeLine = Lines[I].trim();
+ if (CodeLine.size() >= 3 &&
+ llvm::all_of(CodeLine.take_front(3),
+ [Fence](char C) { return C == Fence; }))
+ break;
+ CodeLines.push_back(makeText(Lines[I]));
+ ++I;
+ }
+ ++I; // skip closing fence
+ MDNode Code;
+ Code.Kind = NodeKind::FencedCode;
+ Code.Content = Lang;
+ Code.Children = allocateNodes(CodeLines, Arena);
+ Nodes.push_back(Code);
+ continue;
+ }
+
+ // Pipe table: current line has | and next line is a separator row
+ if (Line.contains('|') && I + 1 < Lines.size() &&
+ isSepRow(Lines[I + 1].trim())) {
+ llvm::SmallVector<MDNode, 4> Rows;
+ while (I < Lines.size() && Lines[I].trim().contains('|')) {
+ Rows.push_back(makeText(Lines[I].trim()));
+ ++I;
+ }
+ MDNode Table;
+ Table.Kind = NodeKind::Table;
+ Table.Content = {};
+ Table.Children = allocateNodes(Rows, Arena);
+ Nodes.push_back(Table);
+ continue;
+ }
+
+ // Unordered list item
+ if (Line.starts_with("- ") || Line.starts_with("* ") ||
+ Line.starts_with("+ ")) {
+ llvm::SmallVector<MDNode, 4> Items;
+ while (I < Lines.size()) {
+ llvm::StringRef L = Lines[I].trim();
+ if (!L.starts_with("- ") && !L.starts_with("* ") &&
+ !L.starts_with("+ "))
+ break;
+ MDNode Item;
+ Item.Kind = NodeKind::ListItem;
+ Item.Content = L.drop_front(2).trim();
+ Item.Children = {};
+ Items.push_back(Item);
+ ++I;
+ }
+ MDNode List;
+ List.Kind = NodeKind::UnorderedList;
+ List.Content = {};
+ List.Children = allocateNodes(Items, Arena);
+ Nodes.push_back(List);
+ continue;
+ }
+
+ // Plain text fallback
+ Nodes.push_back(makeText(Line));
+ ++I;
+ }
+
+ return allocateNodes(Nodes, Arena);
+}
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h
new file mode 100644
index 0000000000000..bf4815e068b53
--- /dev/null
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -0,0 +1,59 @@
+//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a standalone Markdown parsing library for the LLVM
+// ecosystem. The parser takes plain text and returns a tree of typed nodes
+// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+enum class NodeKind : uint8_t {
+ // Block nodes
+ Paragraph,
+ FencedCode,
+ Table,
+ UnorderedList,
+ OrderedList,
+ ListItem,
+ ThematicBreak,
+ // Inline nodes
+ Text,
+ InlineCode,
+ Emphasis,
+ Strong,
+ SoftBreak,
+};
+
+struct MDNode {
+ NodeKind Kind;
+ llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
+ llvm::ArrayRef<MDNode> Children; // arena allocated
+};
+
+// Parses Markdown from a single comment paragraph's text.
+// Returns an empty ArrayRef if no Markdown constructs are found,
+// so generators can fall back to plain-text rendering at zero cost.
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+ llvm::BumpPtrAllocator &Arena);
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file
>From 750b43aacf1705b707ae736e58deb1e55e5d169a Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 29 May 2026 14:27:09 -0400
Subject: [PATCH 05/13] [clang-doc] Fix enum prefixes and file headers
---
.../clang-doc/support/Markdown.cpp | 21 +++++----
.../clang-doc/support/Markdown.h | 45 ++++++++++---------
2 files changed, 33 insertions(+), 33 deletions(-)
diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp
index 17ee61369fb6b..bbce53fa17156 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -1,4 +1,4 @@
-//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,16 +15,15 @@ namespace clang {
namespace doc {
namespace markdown {
-static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
+static MDNode makeText(llvm::StringRef S) {
+ return {NodeKind::NK_Text, S, {}};
+}
// A line is a table separator if it only contains |, -, :, and spaces,
// and has at least one -.
static bool isSepRow(llvm::StringRef Line) {
- return llvm::all_of(Line,
- [](char C) {
- return C == '|' || C == '-' || C == ':' || C == ' ';
- }) &&
- Line.contains('-');
+ return Line.contains('-') &&
+ Line.find_first_not_of("|-: ") == llvm::StringRef::npos;
}
static llvm::ArrayRef<MDNode>
@@ -73,7 +72,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
}
++I; // skip closing fence
MDNode Code;
- Code.Kind = NodeKind::FencedCode;
+ Code.Kind = NodeKind::NK_FencedCode;
Code.Content = Lang;
Code.Children = allocateNodes(CodeLines, Arena);
Nodes.push_back(Code);
@@ -89,7 +88,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
++I;
}
MDNode Table;
- Table.Kind = NodeKind::Table;
+ Table.Kind = NodeKind::NK_Table;
Table.Content = {};
Table.Children = allocateNodes(Rows, Arena);
Nodes.push_back(Table);
@@ -106,14 +105,14 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
!L.starts_with("+ "))
break;
MDNode Item;
- Item.Kind = NodeKind::ListItem;
+ Item.Kind = NodeKind::NK_ListItem;
Item.Content = L.drop_front(2).trim();
Item.Children = {};
Items.push_back(Item);
++I;
}
MDNode List;
- List.Kind = NodeKind::UnorderedList;
+ List.Kind = NodeKind::NK_UnorderedList;
List.Content = {};
List.Children = allocateNodes(Items, Arena);
Nodes.push_back(List);
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h
index bf4815e068b53..e665170473601 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.h
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -1,15 +1,16 @@
-//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines a standalone Markdown parsing library for the LLVM
-// ecosystem. The parser takes plain text and returns a tree of typed nodes
-// with no knowledge of comments, Doxygen, or Clang-Doc internals.
-//
+///
+/// \file
+/// This file defines a standalone Markdown parsing library for the LLVM
+/// ecosystem. The parser takes plain text and returns a tree of typed nodes
+/// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
@@ -23,21 +24,21 @@ namespace clang {
namespace doc {
namespace markdown {
-enum class NodeKind : uint8_t {
+enum class NodeKind {
// Block nodes
- Paragraph,
- FencedCode,
- Table,
- UnorderedList,
- OrderedList,
- ListItem,
- ThematicBreak,
+ NK_Paragraph,
+ NK_FencedCode,
+ NK_Table,
+ NK_UnorderedList,
+ NK_OrderedList,
+ NK_ListItem,
+ NK_ThematicBreak,
// Inline nodes
- Text,
- InlineCode,
- Emphasis,
- Strong,
- SoftBreak,
+ NK_Text,
+ NK_InlineCode,
+ NK_Emphasis,
+ NK_Strong,
+ NK_SoftBreak,
};
struct MDNode {
@@ -46,9 +47,9 @@ struct MDNode {
llvm::ArrayRef<MDNode> Children; // arena allocated
};
-// Parses Markdown from a single comment paragraph's text.
-// Returns an empty ArrayRef if no Markdown constructs are found,
-// so generators can fall back to plain-text rendering at zero cost.
+/// Parses Markdown from a single comment paragraph's text.
+/// Returns an empty ArrayRef if no Markdown constructs are found,
+/// so generators can fall back to plain-text rendering at zero cost.
llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
llvm::BumpPtrAllocator &Arena);
>From 434e6328c40d4f10ab70b5a4e28ca70bc1e7edd5 Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 29 May 2026 16:17:59 -0400
Subject: [PATCH 06/13] [clang-doc] Add unit tests for Markdown parser
---
.../unittests/clang-doc/CMakeLists.txt | 4 +-
.../clang-doc/MarkdownParserTest.cpp | 94 +++++++++++++++++++
2 files changed, 97 insertions(+), 1 deletion(-)
create mode 100644 clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
index 01b34ec9a791e..b74207ac88fa7 100644
--- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
@@ -26,6 +26,7 @@ add_extra_unittest(ClangDocTests
ClangDocTest.cpp
GeneratorTest.cpp
HTMLGeneratorTest.cpp
+ MarkdownParserTest.cpp
MDGeneratorTest.cpp
MergeTest.cpp
SerializeTest.cpp
@@ -49,5 +50,6 @@ clang_target_link_libraries(ClangDocTests
target_link_libraries(ClangDocTests
PRIVATE
clangDoc
+ clangDocSupport
LLVMTestingSupport
- )
+ )
\ No newline at end of file
diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
new file mode 100644
index 0000000000000..8df5efc7f1d5f
--- /dev/null
+++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/Markdown.h"
+#include "llvm/Support/Allocator.h"
+#include "gtest/gtest.h"
+
+using namespace clang::doc::markdown;
+
+namespace {
+
+TEST(MarkdownParserTest, EmptyInput) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("", Arena);
+ EXPECT_TRUE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, WhitespaceOnlyInput) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown(" \n \n", Arena);
+ EXPECT_TRUE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, PlainText) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("hello world", Arena);
+ ASSERT_EQ(Nodes.size(), 1u);
+ EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text);
+ EXPECT_EQ(Nodes[0].Content, "hello world");
+}
+
+TEST(MarkdownParserTest, FencedCodeBlock) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena);
+ ASSERT_EQ(Nodes.size(), 1u);
+ EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
+ EXPECT_EQ(Nodes[0].Content, "cpp");
+ ASSERT_EQ(Nodes[0].Children.size(), 1u);
+}
+
+TEST(MarkdownParserTest, FencedCodeBlockNoLang) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("```\nsome code\n```", Arena);
+ ASSERT_EQ(Nodes.size(), 1u);
+ EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
+ EXPECT_TRUE(Nodes[0].Content.empty());
+}
+
+TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena);
+ // Unterminated fence should not crash and should produce a code node
+ // with whatever lines were found.
+ EXPECT_FALSE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, PipeTable) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena);
+ ASSERT_EQ(Nodes.size(), 1u);
+ EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table);
+}
+
+TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("a | b\nc | d", Arena);
+ // No separator row so should not be parsed as a table
+ for (const auto &Node : Nodes)
+ EXPECT_NE(Node.Kind, NodeKind::NK_Table);
+}
+
+TEST(MarkdownParserTest, UnorderedList) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena);
+ ASSERT_EQ(Nodes.size(), 1u);
+ EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList);
+ ASSERT_EQ(Nodes[0].Children.size(), 3u);
+ EXPECT_EQ(Nodes[0].Children[0].Content, "foo");
+ EXPECT_EQ(Nodes[0].Children[1].Content, "bar");
+ EXPECT_EQ(Nodes[0].Children[2].Content, "baz");
+}
+
+TEST(MarkdownParserTest, MixedContent) {
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena);
+ EXPECT_EQ(Nodes.size(), 3u);
+}
+
+} // namespace
\ No newline at end of file
>From fb907ac64844b5aa7c0679e32884438dc454949a Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 29 May 2026 16:21:43 -0400
Subject: [PATCH 07/13] [clang-doc] Add design documentation to Markdown.h
---
clang-tools-extra/clang-doc/support/Markdown.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h
index e665170473601..0ae33e33e7eba 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.h
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -11,6 +11,22 @@
/// ecosystem. The parser takes plain text and returns a tree of typed nodes
/// with no knowledge of comments, Doxygen, or Clang-Doc internals.
///
+/// This is a simple Markdown parser for use inside Clang-Doc's comment
+/// pipeline. You give it a paragraph of text and an arena allocator, and it
+/// gives back a list of typed nodes describing the Markdown structure it found.
+///
+/// The main entry point is parseMarkdown(). If the text has no Markdown in it,
+/// you get back an empty list and can fall back to plain-text output. If it
+/// does, you get a tree of MDNode structs where each node has a kind, optional
+/// content (like the language tag on a code fence), and optional children.
+///
+/// All nodes are allocated in the arena you pass in. You own the arena and are
+/// responsible for keeping it alive as long as you use the nodes.
+///
+/// The parser handles fenced code blocks, pipe tables, and unordered lists.
+/// Anything it does not recognize comes back as a plain text node. It will
+/// never crash on bad input.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
>From 5ea6a4ee7b6f41862fbc78b4a348baae32b597d9 Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 5 Jun 2026 00:50:59 -0400
Subject: [PATCH 08/13] [clang-doc] Integrate Markdown parser into MDGenerator
for fenced code blocks
---
clang-tools-extra/clang-doc/MDGenerator.cpp | 87 ++++++++++++++++++++-
1 file changed, 83 insertions(+), 4 deletions(-)
diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp
index df1ca6b868d43..0a01ec5aeba2d 100644
--- a/clang-tools-extra/clang-doc/MDGenerator.cpp
+++ b/clang-tools-extra/clang-doc/MDGenerator.cpp
@@ -8,14 +8,19 @@
#include "Generators.h"
#include "Representation.h"
+#include "support/Markdown.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
+#define DEBUG_TYPE "clang-doc-md-generator"
+
using namespace llvm;
namespace clang {
@@ -54,12 +59,10 @@ static void writeHeader(const Twine &Text, unsigned int Num, raw_ostream &OS) {
static void writeSourceFileRef(const ClangDocContext &CDCtx, const Location &L,
raw_ostream &OS) {
-
if (!CDCtx.RepositoryUrl) {
OS << "*Defined at " << L.Filename << "#"
<< std::to_string(L.StartLineNumber) << "*";
} else {
-
OS << formatv("*Defined at [#{0}{1}{2}](#{0}{1}{3})*",
CDCtx.RepositoryLinePrefix.value_or(""), L.StartLineNumber,
L.Filename, *CDCtx.RepositoryUrl);
@@ -148,6 +151,40 @@ static void maybeWriteSourceFileRef(llvm::raw_ostream &OS,
writeSourceFileRef(CDCtx, *DefLoc, OS);
}
+/// Writes a single parsed Markdown node to the output stream.
+///
+/// Currently handles fenced code blocks and plain text. All other node
+/// kinds fall back to emitting their content as plain text so that
+/// unrecognised constructs never produce empty output.
+static void writeMDNode(const markdown::MDNode &Node, raw_ostream &OS) {
+ switch (Node.Kind) {
+ case markdown::NodeKind::NK_FencedCode:
+ // Emit a Markdown fenced code block, preserving the language tag if one
+ // was present on the opening fence line.
+ OS << "\n```";
+ if (!Node.Content.empty())
+ OS << Node.Content;
+ OS << "\n";
+ for (const auto &Line : Node.Children)
+ OS << Line.Content << "\n";
+ OS << "```\n\n";
+ break;
+
+ case markdown::NodeKind::NK_Text:
+ OS << Node.Content;
+ break;
+
+ default:
+ // Unhandled node kinds: emit content and recurse into children so
+ // nothing is silently dropped.
+ if (!Node.Content.empty())
+ OS << Node.Content;
+ for (const auto &Child : Node.Children)
+ writeMDNode(Child, OS);
+ break;
+ }
+}
+
static void writeDescription(const CommentInfo &I, raw_ostream &OS) {
switch (I.Kind) {
case CommentKind::CK_FullComment:
@@ -155,11 +192,53 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) {
writeDescription(Child, OS);
break;
- case CommentKind::CK_ParagraphComment:
+ case CommentKind::CK_ParagraphComment: {
+ // Clang's comment parser represents each line of a documentation
+ // paragraph as a separate CK_TextComment child. To parse Markdown
+ // constructs that span multiple lines (e.g. fenced code blocks),
+ // we concatenate all text-only children and attempt to parse the
+ // result as Markdown before falling back to plain-text emission.
+ //
+ // If the paragraph contains non-text children (inline commands,
+ // HTML tags, etc.) we skip Markdown parsing entirely and fall back
+ // to the original recursive approach so existing behaviour is
+ // preserved.
+ bool AllTextChildren = true;
+ for (const auto &Child : I.Children)
+ if (Child.Kind != CommentKind::CK_TextComment) {
+ AllTextChildren = false;
+ break;
+ }
+
+ if (AllTextChildren && !I.Children.empty()) {
+ std::string ParagraphText;
+ llvm::raw_string_ostream TextOS(ParagraphText);
+ for (const auto &Child : I.Children)
+ if (!Child.Text.empty())
+ TextOS << Child.Text << "\n";
+
+ // The allocator is scoped to this paragraph; nodes must not outlive it.
+ llvm::BumpPtrAllocator Arena;
+ auto Nodes = markdown::parseMarkdown(ParagraphText, Arena);
+
+ LLVM_DEBUG(llvm::dbgs()
+ << "[clang-doc] paragraph -> " << Nodes.size()
+ << " Markdown node(s)\n");
+
+ if (!Nodes.empty()) {
+ for (const auto &Node : Nodes)
+ writeMDNode(Node, OS);
+ writeNewLine(OS);
+ break;
+ }
+ }
+
+ // Fall back: emit children recursively (original behaviour).
for (const auto &Child : I.Children)
writeDescription(Child, OS);
writeNewLine(OS);
break;
+ }
case CommentKind::CK_BlockCommandComment:
OS << genEmphasis(I.Name) << " ";
@@ -608,4 +687,4 @@ static GeneratorRegistry::Add<MDGenerator> MD(MDGenerator::Format,
volatile int MDGeneratorAnchorSource = 0;
} // namespace doc
-} // namespace clang
+} // namespace clang
\ No newline at end of file
>From c950a96be144926daba5bb1a88104eb7d2bd651e Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 5 Jun 2026 01:08:58 -0400
Subject: [PATCH 09/13] [clang-doc] Add FileCheck test for Markdown fenced code
block integration
---
.../test/clang-doc/markdown-fenced-code.cpp | 15 +++++++++++++++
1 file changed, 15 insertions(+)
create mode 100644 clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp
diff --git a/clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp b/clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp
new file mode 100644
index 0000000000000..a55b73b3a569e
--- /dev/null
+++ b/clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp
@@ -0,0 +1,15 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: clang-doc --format=md --doxygen --output=%t --executor=standalone %s 2>&1 || true
+// RUN: FileCheck %s < %t/GlobalNamespace/index.md
+
+/// A function with a fenced code block in its documentation.
+/// Example usage:
+/// ```cpp
+/// int x = documented();
+/// ```
+int documented();
+
+// CHECK: ### documented
+// CHECK: ```cpp
+// CHECK: int x = documented();
+// CHECK: ```
\ No newline at end of file
>From ddf1f20bb0af4ee6494cd3d5e53be965a5802430 Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 5 Jun 2026 01:20:15 -0400
Subject: [PATCH 10/13] [clang-doc] Add LLVM_DEBUG macros to Markdown parser
---
.../clang-doc/support/Markdown.cpp | 31 +++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp
index bbce53fa17156..49c141bc2baed 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -10,6 +10,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "clang-doc-markdown"
namespace clang {
namespace doc {
@@ -38,12 +42,17 @@ allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes,
llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
llvm::BumpPtrAllocator &Arena) {
- if (ParagraphText.trim().empty())
+ if (ParagraphText.trim().empty()) {
+ LLVM_DEBUG(llvm::dbgs() << "[md] empty input, returning nothing\n");
return {};
+ }
llvm::SmallVector<llvm::StringRef, 16> Lines;
ParagraphText.split(Lines, '\n');
+ LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size()
+ << " line(s)\n");
+
llvm::SmallVector<MDNode, 8> Nodes;
unsigned I = 0;
@@ -59,14 +68,19 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
if (Line.starts_with("```") || Line.starts_with("~~~")) {
char Fence = Line[0];
llvm::StringRef Lang = Line.drop_front(3).trim();
+ LLVM_DEBUG(llvm::dbgs() << "[md] fenced code block, lang='"
+ << Lang << "'\n");
llvm::SmallVector<MDNode, 4> CodeLines;
++I;
while (I < Lines.size()) {
llvm::StringRef CodeLine = Lines[I].trim();
if (CodeLine.size() >= 3 &&
llvm::all_of(CodeLine.take_front(3),
- [Fence](char C) { return C == Fence; }))
+ [Fence](char C) { return C == Fence; })) {
+ LLVM_DEBUG(llvm::dbgs() << "[md] closing fence found at line "
+ << I << "\n");
break;
+ }
CodeLines.push_back(makeText(Lines[I]));
++I;
}
@@ -75,6 +89,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
Code.Kind = NodeKind::NK_FencedCode;
Code.Content = Lang;
Code.Children = allocateNodes(CodeLines, Arena);
+ LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_FencedCode with "
+ << CodeLines.size() << " line(s)\n");
Nodes.push_back(Code);
continue;
}
@@ -82,6 +98,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
// Pipe table: current line has | and next line is a separator row
if (Line.contains('|') && I + 1 < Lines.size() &&
isSepRow(Lines[I + 1].trim())) {
+ LLVM_DEBUG(llvm::dbgs() << "[md] pipe table detected at line "
+ << I << "\n");
llvm::SmallVector<MDNode, 4> Rows;
while (I < Lines.size() && Lines[I].trim().contains('|')) {
Rows.push_back(makeText(Lines[I].trim()));
@@ -91,6 +109,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
Table.Kind = NodeKind::NK_Table;
Table.Content = {};
Table.Children = allocateNodes(Rows, Arena);
+ LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with "
+ << Rows.size() << " row(s)\n");
Nodes.push_back(Table);
continue;
}
@@ -98,6 +118,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
// Unordered list item
if (Line.starts_with("- ") || Line.starts_with("* ") ||
Line.starts_with("+ ")) {
+ LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I
+ << "\n");
llvm::SmallVector<MDNode, 4> Items;
while (I < Lines.size()) {
llvm::StringRef L = Lines[I].trim();
@@ -115,15 +137,20 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
List.Kind = NodeKind::NK_UnorderedList;
List.Content = {};
List.Children = allocateNodes(Items, Arena);
+ LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_UnorderedList with "
+ << Items.size() << " item(s)\n");
Nodes.push_back(List);
continue;
}
// Plain text fallback
+ LLVM_DEBUG(llvm::dbgs() << "[md] plain text: '" << Line << "'\n");
Nodes.push_back(makeText(Line));
++I;
}
+ LLVM_DEBUG(llvm::dbgs() << "[md] done, " << Nodes.size()
+ << " top-level node(s)\n");
return allocateNodes(Nodes, Arena);
}
>From 55b0e43e371a619ad74c7253d8f01236787f88b7 Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 5 Jun 2026 01:30:32 -0400
Subject: [PATCH 11/13] [clang-doc] Fix clang-format violations in LLVM_DEBUG
macros
---
clang-tools-extra/clang-doc/MDGenerator.cpp | 5 ++--
.../clang-doc/support/Markdown.cpp | 28 +++++++++----------
2 files changed, 15 insertions(+), 18 deletions(-)
diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp
index 0a01ec5aeba2d..80a43970d4d05 100644
--- a/clang-tools-extra/clang-doc/MDGenerator.cpp
+++ b/clang-tools-extra/clang-doc/MDGenerator.cpp
@@ -221,9 +221,8 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) {
llvm::BumpPtrAllocator Arena;
auto Nodes = markdown::parseMarkdown(ParagraphText, Arena);
- LLVM_DEBUG(llvm::dbgs()
- << "[clang-doc] paragraph -> " << Nodes.size()
- << " Markdown node(s)\n");
+ LLVM_DEBUG(llvm::dbgs() << "[clang-doc] paragraph -> " << Nodes.size()
+ << " Markdown node(s)\n");
if (!Nodes.empty()) {
for (const auto &Node : Nodes)
diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp
index 49c141bc2baed..3c425d89527c1 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -50,8 +50,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
llvm::SmallVector<llvm::StringRef, 16> Lines;
ParagraphText.split(Lines, '\n');
- LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size()
- << " line(s)\n");
+ LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size() << " line(s)\n");
llvm::SmallVector<MDNode, 8> Nodes;
unsigned I = 0;
@@ -68,8 +67,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
if (Line.starts_with("```") || Line.starts_with("~~~")) {
char Fence = Line[0];
llvm::StringRef Lang = Line.drop_front(3).trim();
- LLVM_DEBUG(llvm::dbgs() << "[md] fenced code block, lang='"
- << Lang << "'\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "[md] fenced code block, lang='" << Lang << "'\n");
llvm::SmallVector<MDNode, 4> CodeLines;
++I;
while (I < Lines.size()) {
@@ -77,8 +76,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
if (CodeLine.size() >= 3 &&
llvm::all_of(CodeLine.take_front(3),
[Fence](char C) { return C == Fence; })) {
- LLVM_DEBUG(llvm::dbgs() << "[md] closing fence found at line "
- << I << "\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "[md] closing fence found at line " << I << "\n");
break;
}
CodeLines.push_back(makeText(Lines[I]));
@@ -90,7 +89,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
Code.Content = Lang;
Code.Children = allocateNodes(CodeLines, Arena);
LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_FencedCode with "
- << CodeLines.size() << " line(s)\n");
+ << CodeLines.size() << " line(s)\n");
Nodes.push_back(Code);
continue;
}
@@ -98,8 +97,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
// Pipe table: current line has | and next line is a separator row
if (Line.contains('|') && I + 1 < Lines.size() &&
isSepRow(Lines[I + 1].trim())) {
- LLVM_DEBUG(llvm::dbgs() << "[md] pipe table detected at line "
- << I << "\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "[md] pipe table detected at line " << I << "\n");
llvm::SmallVector<MDNode, 4> Rows;
while (I < Lines.size() && Lines[I].trim().contains('|')) {
Rows.push_back(makeText(Lines[I].trim()));
@@ -109,8 +108,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
Table.Kind = NodeKind::NK_Table;
Table.Content = {};
Table.Children = allocateNodes(Rows, Arena);
- LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with "
- << Rows.size() << " row(s)\n");
+ LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with " << Rows.size()
+ << " row(s)\n");
Nodes.push_back(Table);
continue;
}
@@ -118,8 +117,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
// Unordered list item
if (Line.starts_with("- ") || Line.starts_with("* ") ||
Line.starts_with("+ ")) {
- LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I
- << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I << "\n");
llvm::SmallVector<MDNode, 4> Items;
while (I < Lines.size()) {
llvm::StringRef L = Lines[I].trim();
@@ -138,7 +136,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
List.Content = {};
List.Children = allocateNodes(Items, Arena);
LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_UnorderedList with "
- << Items.size() << " item(s)\n");
+ << Items.size() << " item(s)\n");
Nodes.push_back(List);
continue;
}
@@ -150,7 +148,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
}
LLVM_DEBUG(llvm::dbgs() << "[md] done, " << Nodes.size()
- << " top-level node(s)\n");
+ << " top-level node(s)\n");
return allocateNodes(Nodes, Arena);
}
>From ece5dd18712cc7e974a4e2d6cae1bc5edd5a3382 Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 5 Jun 2026 02:14:26 -0400
Subject: [PATCH 12/13] [clang-doc] Switch to LDBG() for debug output
---
clang-tools-extra/clang-doc/MDGenerator.cpp | 8 ++
clang-tools-extra/clang-doc/Markdown.cpp | 133 --------------------
clang-tools-extra/clang-doc/Markdown.h | 59 ---------
3 files changed, 8 insertions(+), 192 deletions(-)
delete mode 100644 clang-tools-extra/clang-doc/Markdown.cpp
delete mode 100644 clang-tools-extra/clang-doc/Markdown.h
diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp
index 80a43970d4d05..a8ba954fa063d 100644
--- a/clang-tools-extra/clang-doc/MDGenerator.cpp
+++ b/clang-tools-extra/clang-doc/MDGenerator.cpp
@@ -12,7 +12,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
+<<<<<<< Updated upstream
#include "llvm/Support/Debug.h"
+=======
+#include "llvm/Support/DebugLog.h"
+>>>>>>> Stashed changes
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
@@ -221,8 +225,12 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) {
llvm::BumpPtrAllocator Arena;
auto Nodes = markdown::parseMarkdown(ParagraphText, Arena);
+<<<<<<< Updated upstream
LLVM_DEBUG(llvm::dbgs() << "[clang-doc] paragraph -> " << Nodes.size()
<< " Markdown node(s)\n");
+=======
+ LDBG() << "paragraph -> " << Nodes.size() << " Markdown node(s)";
+>>>>>>> Stashed changes
if (!Nodes.empty()) {
for (const auto &Node : Nodes)
diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp
deleted file mode 100644
index 17ee61369fb6b..0000000000000
--- a/clang-tools-extra/clang-doc/Markdown.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "Markdown.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Allocator.h"
-
-namespace clang {
-namespace doc {
-namespace markdown {
-
-static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
-
-// A line is a table separator if it only contains |, -, :, and spaces,
-// and has at least one -.
-static bool isSepRow(llvm::StringRef Line) {
- return llvm::all_of(Line,
- [](char C) {
- return C == '|' || C == '-' || C == ':' || C == ' ';
- }) &&
- Line.contains('-');
-}
-
-static llvm::ArrayRef<MDNode>
-allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes,
- llvm::BumpPtrAllocator &Arena) {
- if (Nodes.empty())
- return {};
- MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size());
- std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated);
- return llvm::ArrayRef<MDNode>(Allocated, Nodes.size());
-}
-
-llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
- llvm::BumpPtrAllocator &Arena) {
- if (ParagraphText.trim().empty())
- return {};
-
- llvm::SmallVector<llvm::StringRef, 16> Lines;
- ParagraphText.split(Lines, '\n');
-
- llvm::SmallVector<MDNode, 8> Nodes;
- unsigned I = 0;
-
- while (I < Lines.size()) {
- llvm::StringRef Line = Lines[I].trim();
-
- if (Line.empty()) {
- ++I;
- continue;
- }
-
- // Fenced code block: ``` or ~~~
- if (Line.starts_with("```") || Line.starts_with("~~~")) {
- char Fence = Line[0];
- llvm::StringRef Lang = Line.drop_front(3).trim();
- llvm::SmallVector<MDNode, 4> CodeLines;
- ++I;
- while (I < Lines.size()) {
- llvm::StringRef CodeLine = Lines[I].trim();
- if (CodeLine.size() >= 3 &&
- llvm::all_of(CodeLine.take_front(3),
- [Fence](char C) { return C == Fence; }))
- break;
- CodeLines.push_back(makeText(Lines[I]));
- ++I;
- }
- ++I; // skip closing fence
- MDNode Code;
- Code.Kind = NodeKind::FencedCode;
- Code.Content = Lang;
- Code.Children = allocateNodes(CodeLines, Arena);
- Nodes.push_back(Code);
- continue;
- }
-
- // Pipe table: current line has | and next line is a separator row
- if (Line.contains('|') && I + 1 < Lines.size() &&
- isSepRow(Lines[I + 1].trim())) {
- llvm::SmallVector<MDNode, 4> Rows;
- while (I < Lines.size() && Lines[I].trim().contains('|')) {
- Rows.push_back(makeText(Lines[I].trim()));
- ++I;
- }
- MDNode Table;
- Table.Kind = NodeKind::Table;
- Table.Content = {};
- Table.Children = allocateNodes(Rows, Arena);
- Nodes.push_back(Table);
- continue;
- }
-
- // Unordered list item
- if (Line.starts_with("- ") || Line.starts_with("* ") ||
- Line.starts_with("+ ")) {
- llvm::SmallVector<MDNode, 4> Items;
- while (I < Lines.size()) {
- llvm::StringRef L = Lines[I].trim();
- if (!L.starts_with("- ") && !L.starts_with("* ") &&
- !L.starts_with("+ "))
- break;
- MDNode Item;
- Item.Kind = NodeKind::ListItem;
- Item.Content = L.drop_front(2).trim();
- Item.Children = {};
- Items.push_back(Item);
- ++I;
- }
- MDNode List;
- List.Kind = NodeKind::UnorderedList;
- List.Content = {};
- List.Children = allocateNodes(Items, Arena);
- Nodes.push_back(List);
- continue;
- }
-
- // Plain text fallback
- Nodes.push_back(makeText(Line));
- ++I;
- }
-
- return allocateNodes(Nodes, Arena);
-}
-
-} // namespace markdown
-} // namespace doc
-} // namespace clang
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/Markdown.h b/clang-tools-extra/clang-doc/Markdown.h
deleted file mode 100644
index bf4815e068b53..0000000000000
--- a/clang-tools-extra/clang-doc/Markdown.h
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a standalone Markdown parsing library for the LLVM
-// ecosystem. The parser takes plain text and returns a tree of typed nodes
-// with no knowledge of comments, Doxygen, or Clang-Doc internals.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Allocator.h"
-
-namespace clang {
-namespace doc {
-namespace markdown {
-
-enum class NodeKind : uint8_t {
- // Block nodes
- Paragraph,
- FencedCode,
- Table,
- UnorderedList,
- OrderedList,
- ListItem,
- ThematicBreak,
- // Inline nodes
- Text,
- InlineCode,
- Emphasis,
- Strong,
- SoftBreak,
-};
-
-struct MDNode {
- NodeKind Kind;
- llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
- llvm::ArrayRef<MDNode> Children; // arena allocated
-};
-
-// Parses Markdown from a single comment paragraph's text.
-// Returns an empty ArrayRef if no Markdown constructs are found,
-// so generators can fall back to plain-text rendering at zero cost.
-llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
- llvm::BumpPtrAllocator &Arena);
-
-} // namespace markdown
-} // namespace doc
-} // namespace clang
-
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file
>From 139e6d170588a96d9bc213402a7cfbfdad2af58d Mon Sep 17 00:00:00 2001
From: Neil-N4 <neilnair4 at gmail.com>
Date: Fri, 5 Jun 2026 02:24:21 -0400
Subject: [PATCH 13/13] [clang-doc] Switch to LDBG() for debug output
---
clang-tools-extra/clang-doc/MDGenerator.cpp | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp
index a8ba954fa063d..e5648cbc7a82f 100644
--- a/clang-tools-extra/clang-doc/MDGenerator.cpp
+++ b/clang-tools-extra/clang-doc/MDGenerator.cpp
@@ -12,11 +12,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
-<<<<<<< Updated upstream
-#include "llvm/Support/Debug.h"
-=======
#include "llvm/Support/DebugLog.h"
->>>>>>> Stashed changes
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
@@ -225,12 +221,7 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) {
llvm::BumpPtrAllocator Arena;
auto Nodes = markdown::parseMarkdown(ParagraphText, Arena);
-<<<<<<< Updated upstream
- LLVM_DEBUG(llvm::dbgs() << "[clang-doc] paragraph -> " << Nodes.size()
- << " Markdown node(s)\n");
-=======
LDBG() << "paragraph -> " << Nodes.size() << " Markdown node(s)";
->>>>>>> Stashed changes
if (!Nodes.empty()) {
for (const auto &Node : Nodes)
More information about the cfe-commits
mailing list