[clang-tools-extra] [clang-doc] add Markdown parser (PR #155887)

Paul Kirth via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 29 13:55:08 PDT 2025


================
@@ -0,0 +1,229 @@
+#include "MDParser.h"
+#include "clang/Basic/CharInfo.h"
+#include "llvm/ADT/AllocatorList.h"
+
+namespace clang {
+namespace doc {
+namespace {
+bool isEmphasisDelimiter(char &Token) {
+  // TODO: support '_'
+  if (Token == '*')
+    return true;
+  return false;
+}
+} // namespace
+
+std::pair<std::optional<DelimiterContext>, size_t>
+MarkdownParser::processDelimiters(SmallString<64> &Line, const size_t &Origin) {
+  size_t Idx = Origin;
+  while (Idx < Line.size() && Line[Idx] == Line[Origin]) {
+    ++Idx;
+  }
+
+  char Preceeding = (Origin == 0) ? ' ' : Line[Origin - 1];
+  char Proceeding = (Idx >= Line.size()) ? ' ' : Line[Idx];
+
+  bool LeftFlanking = !isWhitespace(Proceeding) &&
+                      (!isPunctuation(Proceeding) || isWhitespace(Preceeding) ||
+                       isPunctuation(Preceeding));
+  bool RightFlanking = !isWhitespace(Preceeding) &&
+                       (!isPunctuation(Preceeding) || isWhitespace(Proceeding) ||
+                        isPunctuation(Proceeding));
+
+  if (LeftFlanking && RightFlanking)
+    return {DelimiterContext{LeftFlanking, RightFlanking, true, true}, Idx};
+  if (LeftFlanking)
+    return {DelimiterContext{LeftFlanking, RightFlanking, true, false}, Idx};
+  if (RightFlanking)
+    return {DelimiterContext{LeftFlanking, RightFlanking, false, true}, Idx};
+  return {std::nullopt, 0};
+}
+
+Node *MarkdownParser::createTextNode(const std::list<LineNode *> &Text) {
+  Node *TextNode = new (Arena) Node();
+  for (const auto *Node : Text) {
+    TextNode->Content.append(Node->Content);
+  }
+  TextNode->Type = MDType::Text;
+  return TextNode;
+}
+
+Node *MarkdownParser::reverseIterateLine(std::list<LineNode *> &Stack,
+                                         std::list<LineNode *>::iterator &It) {
+  auto ReverseIt = std::make_reverse_iterator(It);
+  std::list<LineNode *> Text;
+  while (ReverseIt != Stack.rend()) {
+    auto *ReverseCurrent = *ReverseIt;
+    if (!ReverseCurrent->DelimiterContext && !ReverseCurrent->Content.empty()) {
+      Text.push_back(ReverseCurrent);
+      ReverseIt++;
+      continue;
+    }
+
+    if (ReverseCurrent->DelimiterContext &&
+        ReverseCurrent->DelimiterContext->CanOpen) {
+      if (Text.empty()) {
+        // If there is no text between the runs, there is no emphasis, so both
+        // delimiter runs are literal text.
+        auto *DelimiterTextNode = new (Arena) Node();
+        DelimiterTextNode->Content =
+            Saver.save((*It)->Content + ReverseCurrent->Content);
+        DelimiterTextNode->Type = MDType::Text;
+        return DelimiterTextNode;
+      }
+      Node *Emphasis = nullptr;
+
+      auto &Closer = (*It)->DelimiterContext;
+      auto &Opener = ReverseCurrent->DelimiterContext;
+
+      if (Closer->Length >= 2 && Opener->Length >= 2) {
+        // We have at least one strong node.
+        Closer->Length -= 2;
+        Opener->Length -= 2;
+        Emphasis = new (Arena) Node();
+        Emphasis->Type = MDType::Strong;
+        auto *Child = createTextNode(Text);
+        Child->Parent = Emphasis;
+        Emphasis->Children.push_back(Child);
+      } else if (Closer->Length == 1 && Opener->Length == 1) {
+        Closer->Length -= 1;
+        Opener->Length -= 1;
+        Emphasis = new (Arena) Node();
+        Emphasis->Type = MDType::Emphasis;
+        auto *Child = createTextNode(Text);
+        Child->Parent = Emphasis;
+        Emphasis->Children.push_back(Child);
----------------
ilovepi wrote:

The difference in these blocks seems to be the MDType, and the length adjustment. maybe just use a helper? or just set those bits on their own to avoid repeating mostly the same code?

https://github.com/llvm/llvm-project/pull/155887


More information about the cfe-commits mailing list