[Mlir-commits] [mlir] [MLIR] Make generated markdown doc more consistent (PR #119926)

Kevin Gleason llvmlistbot at llvm.org
Fri Dec 13 13:12:05 PST 2024


https://github.com/GleasonK created https://github.com/llvm/llvm-project/pull/119926

A few changes to doc generation:

- All summaries are in italics.
- In general each optional block starts and ends with a newline.
- All table elements are enclosed in `|`'s
- Overall reduce the number of >2newlines in a row

Rationale for this change is that our markdown to docs generator requires a newline before all headers, otherwise it gets inlined into the line before it, see `### sdy-op-priority-propagate` in the image below.

<img width="883" alt="image" src="https://github.com/user-attachments/assets/b795c424-cecb-48df-abbe-aee2030f4491" />

That said overall I feel this formatting is more consistent now, here's a before and after:

- Dialect documentation diff: https://www.diffchecker.com/OVMHoXeL/
- Pass documentation diff: https://www.diffchecker.com/XEJRmW3k/

>From 2f027e4abe1fa783e9a81efc97dd95d7ea3b85e6 Mon Sep 17 00:00:00 2001
From: Kevin Gleason <gleasonk at google.com>
Date: Fri, 13 Dec 2024 18:59:45 +0000
Subject: [PATCH 1/2] [MLIR] Make generated markdown doc more consistent

---
 mlir/test/mlir-tblgen/gen-dialect-doc.td   |  2 +-
 mlir/tools/mlir-tblgen/OpDocGen.cpp        | 92 +++++++++++-----------
 mlir/tools/mlir-tblgen/OpInterfacesGen.cpp | 10 +--
 mlir/tools/mlir-tblgen/PassDocGen.cpp      |  6 +-
 4 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/mlir/test/mlir-tblgen/gen-dialect-doc.td b/mlir/test/mlir-tblgen/gen-dialect-doc.td
index 79d755111e8f67..eb462fc6510288 100644
--- a/mlir/test/mlir-tblgen/gen-dialect-doc.td
+++ b/mlir/test/mlir-tblgen/gen-dialect-doc.td
@@ -122,7 +122,7 @@ def TestEnum :
 
 // CHECK: ## Enums
 // CHECK: ### TestEnum
-// CHECK: enum summary
+// CHECK: _Enum summary_
 // CHECK: #### Cases:
 // CHECK: | Symbol | Value | String |
 // CHECK: | :----: | :---: | ------ |
diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp
index d499c78a5cf44d..a6e92209c9c57c 100644
--- a/mlir/tools/mlir-tblgen/OpDocGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp
@@ -54,12 +54,12 @@ cl::opt<bool> allowHugoSpecificFeatures(
     cl::cat(docCat));
 
 void mlir::tblgen::emitSummary(StringRef summary, raw_ostream &os) {
-  if (!summary.empty()) {
-    StringRef trimmed = summary.trim();
-    char first = std::toupper(trimmed.front());
-    StringRef rest = trimmed.drop_front();
-    os << "\n_" << first << rest << "_\n\n";
-  }
+  if (summary.empty())
+    return;
+  StringRef trimmed = summary.trim();
+  char first = std::toupper(trimmed.front());
+  StringRef rest = trimmed.drop_front();
+  os << "\n_" << first << rest << "_\n";
 }
 
 // Emit the description by aligning the text to the left per line (e.g.,
@@ -69,6 +69,9 @@ void mlir::tblgen::emitSummary(StringRef summary, raw_ostream &os) {
 // in a way the user wanted but has some additional indenting due to being
 // nested in the op definition.
 void mlir::tblgen::emitDescription(StringRef description, raw_ostream &os) {
+  if (description.empty())
+    return;
+  os << "\n";
   raw_indented_ostream ros(os);
   StringRef trimmed = description.rtrim(" \t");
   ros.printReindented(trimmed);
@@ -80,6 +83,7 @@ void mlir::tblgen::emitDescriptionComment(StringRef description,
                                           raw_ostream &os, StringRef prefix) {
   if (description.empty())
     return;
+  os << "\n";
   raw_indented_ostream ros(os);
   StringRef trimmed = description.rtrim(" \t");
   ros.printReindented(trimmed, (Twine(prefix) + "/// ").str());
@@ -87,22 +91,14 @@ void mlir::tblgen::emitDescriptionComment(StringRef description,
     ros << "\n";
 }
 
-// Emits `str` with trailing newline if not empty.
-static void emitIfNotEmpty(StringRef str, raw_ostream &os) {
-  if (!str.empty()) {
-    emitDescription(str, os);
-    os << "\n";
-  }
-}
-
 /// Emit the given named constraint.
 template <typename T>
 static void emitNamedConstraint(const T &it, raw_ostream &os) {
   if (!it.name.empty())
     os << "| `" << it.name << "`";
   else
-    os << "«unnamed»";
-  os << " | " << it.constraint.getSummary() << "\n";
+    os << "| «unnamed»";
+  os << " | " << it.constraint.getSummary() << " |\n";
 }
 
 //===----------------------------------------------------------------------===//
@@ -112,6 +108,8 @@ static void emitNamedConstraint(const T &it, raw_ostream &os) {
 /// Emit the assembly format of an operation.
 static void emitAssemblyFormat(StringRef opName, StringRef format,
                                raw_ostream &os) {
+  if (format.empty())
+    return;
   os << "\nSyntax:\n\n```\noperation ::= `" << opName << "` ";
 
   // Print the assembly format aligned.
@@ -124,7 +122,7 @@ static void emitAssemblyFormat(StringRef opName, StringRef format,
     if (!formatChunk.empty())
       os.indent(indent) << formatChunk << "\n";
   } while (!split.second.empty());
-  os << "```\n\n";
+  os << "```\n";
 }
 
 /// Place `text` between backticks so that the Markdown processor renders it as
@@ -199,7 +197,7 @@ static void emitOpDoc(const Operator &op, raw_ostream &os) {
   std::string classNameStr = op.getQualCppClassName();
   StringRef className = classNameStr;
   (void)className.consume_front(stripPrefix);
-  os << formatv("### `{0}` ({1})\n", op.getOperationName(), className);
+  os << formatv("\n### `{0}` ({1})\n", op.getOperationName(), className);
 
   // Emit the summary, syntax, and description if present.
   if (op.hasSummary())
@@ -281,8 +279,8 @@ static void emitSourceLink(StringRef inputFilename, raw_ostream &os) {
 
   StringRef inputFromMlirInclude = inputFilename.substr(pathBegin);
 
-  os << "[source](https://github.com/llvm/llvm-project/blob/main/"
-     << inputFromMlirInclude << ")\n\n";
+  os << "\n[source](https://github.com/llvm/llvm-project/blob/main/"
+     << inputFromMlirInclude << ")\n";
 }
 
 static void emitOpDoc(const RecordKeeper &records, raw_ostream &os) {
@@ -299,9 +297,9 @@ static void emitOpDoc(const RecordKeeper &records, raw_ostream &os) {
 //===----------------------------------------------------------------------===//
 
 static void emitAttrDoc(const Attribute &attr, raw_ostream &os) {
-  os << "### " << attr.getSummary() << "\n\n";
+  os << "\n### " << attr.getSummary() << "\n";
   emitDescription(attr.getDescription(), os);
-  os << "\n\n";
+  os << "\n";
 }
 
 //===----------------------------------------------------------------------===//
@@ -309,9 +307,9 @@ static void emitAttrDoc(const Attribute &attr, raw_ostream &os) {
 //===----------------------------------------------------------------------===//
 
 static void emitTypeDoc(const Type &type, raw_ostream &os) {
-  os << "### " << type.getSummary() << "\n\n";
+  os << "\n### " << type.getSummary() << "\n";
   emitDescription(type.getDescription(), os);
-  os << "\n\n";
+  os << "\n";
 }
 
 //===----------------------------------------------------------------------===//
@@ -342,11 +340,11 @@ static void emitAttrOrTypeDefAssemblyFormat(const AttrOrTypeDef &def,
 }
 
 static void emitAttrOrTypeDefDoc(const AttrOrTypeDef &def, raw_ostream &os) {
-  os << formatv("### {0}\n", def.getCppClassName());
+  os << formatv("\n### {0}\n", def.getCppClassName());
 
   // Emit the summary if present.
   if (def.hasSummary())
-    os << "\n" << def.getSummary() << "\n";
+    emitSummary(def.getSummary(), os);
 
   // Emit the syntax if present.
   if (def.getMnemonic() && !def.hasCustomAssemblyFormat())
@@ -354,7 +352,6 @@ static void emitAttrOrTypeDefDoc(const AttrOrTypeDef &def, raw_ostream &os) {
 
   // Emit the description if present.
   if (def.hasDescription()) {
-    os << "\n";
     mlir::tblgen::emitDescription(def.getDescription(), os);
   }
 
@@ -363,11 +360,11 @@ static void emitAttrOrTypeDefDoc(const AttrOrTypeDef &def, raw_ostream &os) {
   if (!parameters.empty()) {
     os << "\n#### Parameters:\n\n";
     os << "| Parameter | C++ type | Description |\n"
-       << "| :-------: | :-------: | ----------- |\n";
+       << "| :-------: | :-------: | ----------- |";
     for (const auto &it : parameters) {
       auto desc = it.getSummary();
-      os << "| " << it.getName() << " | `" << it.getCppType() << "` | "
-         << (desc ? *desc : "") << " |\n";
+      os << "\n| " << it.getName() << " | `" << it.getCppType() << "` | "
+         << (desc ? *desc : "") << " |";
     }
   }
 
@@ -388,20 +385,19 @@ static void emitAttrOrTypeDefDoc(const RecordKeeper &records, raw_ostream &os,
 //===----------------------------------------------------------------------===//
 
 static void emitEnumDoc(const EnumAttr &def, raw_ostream &os) {
-  os << formatv("### {0}\n", def.getEnumClassName());
+  os << formatv("\n### {0}\n", def.getEnumClassName());
 
   // Emit the summary if present.
-  if (!def.getSummary().empty())
-    os << "\n" << def.getSummary() << "\n";
+  emitSummary(def.getSummary(), os);
 
   // Emit case documentation.
   std::vector<EnumAttrCase> cases = def.getAllCases();
   os << "\n#### Cases:\n\n";
   os << "| Symbol | Value | String |\n"
-     << "| :----: | :---: | ------ |\n";
+     << "| :----: | :---: | ------ |";
   for (const auto &it : cases) {
-    os << "| " << it.getSymbol() << " | `" << it.getValue() << "` | "
-       << it.getStr() << " |\n";
+    os << "\n| " << it.getSymbol() << " | `" << it.getValue() << "` | "
+       << it.getStr() << " |";
   }
 
   os << "\n";
@@ -447,7 +443,7 @@ static void emitBlock(ArrayRef<Attribute> attributes, StringRef inputFilename,
                       ArrayRef<Type> types, ArrayRef<TypeDef> typeDefs,
                       ArrayRef<EnumAttr> enums, raw_ostream &os) {
   if (!ops.empty()) {
-    os << "## Operations\n\n";
+    os << "\n## Operations\n";
     emitSourceLink(inputFilename, os);
     for (const OpDocGroup &grouping : ops) {
       bool nested = !grouping.summary.empty();
@@ -455,9 +451,9 @@ static void emitBlock(ArrayRef<Attribute> attributes, StringRef inputFilename,
           nested,
           [&](raw_ostream &os) {
             if (nested) {
-              os << "## " << StringRef(grouping.summary).trim() << "\n\n";
+              os << "\n## " << StringRef(grouping.summary).trim() << "\n";
               emitDescription(grouping.description, os);
-              os << "\n\n";
+              os << "\n";
             }
             for (const Operator &op : grouping.ops) {
               emitOpDoc(op, os);
@@ -468,32 +464,32 @@ static void emitBlock(ArrayRef<Attribute> attributes, StringRef inputFilename,
   }
 
   if (!attributes.empty()) {
-    os << "## Attribute constraints\n\n";
+    os << "\n## Attribute constraints\n";
     for (const Attribute &attr : attributes)
       emitAttrDoc(attr, os);
   }
 
   if (!attrDefs.empty()) {
-    os << "## Attributes\n\n";
+    os << "\n## Attributes\n";
     for (const AttrDef &def : attrDefs)
       emitAttrOrTypeDefDoc(def, os);
   }
 
   // TODO: Add link between use and def for types
   if (!types.empty()) {
-    os << "## Type constraints\n\n";
+    os << "\n## Type constraints\n";
     for (const Type &type : types)
       emitTypeDoc(type, os);
   }
 
   if (!typeDefs.empty()) {
-    os << "## Types\n\n";
+    os << "\n## Types\n";
     for (const TypeDef &def : typeDefs)
       emitAttrOrTypeDefDoc(def, os);
   }
 
   if (!enums.empty()) {
-    os << "## Enums\n\n";
+    os << "\n## Enums\n";
     for (const EnumAttr &def : enums)
       emitEnumDoc(def, os);
   }
@@ -504,14 +500,14 @@ static void emitDialectDoc(const Dialect &dialect, StringRef inputFilename,
                            ArrayRef<AttrDef> attrDefs, ArrayRef<OpDocGroup> ops,
                            ArrayRef<Type> types, ArrayRef<TypeDef> typeDefs,
                            ArrayRef<EnumAttr> enums, raw_ostream &os) {
-  os << "# '" << dialect.getName() << "' Dialect\n\n";
-  emitIfNotEmpty(dialect.getSummary(), os);
-  emitIfNotEmpty(dialect.getDescription(), os);
+  os << "\n# '" << dialect.getName() << "' Dialect\n";
+  emitSummary(dialect.getSummary(), os);
+  emitDescription(dialect.getDescription(), os);
 
   // Generate a TOC marker except if description already contains one.
   Regex r("^[[:space:]]*\\[TOC\\]$", Regex::RegexFlags::Newline);
   if (!r.match(dialect.getDescription()))
-    os << "[TOC]\n\n";
+    os << "\n[TOC]\n";
 
   emitBlock(attributes, inputFilename, attrDefs, ops, types, typeDefs, enums,
             os);
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index 1f1b1d9a340391..dcd68e6c2d6366 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -627,8 +627,8 @@ static void emitInterfaceDoc(const Record &interfaceDef, raw_ostream &os) {
   Interface interface(&interfaceDef);
 
   // Emit the interface name followed by the description.
-  os << "## " << interface.getName() << " (`" << interfaceDef.getName()
-     << "`)\n\n";
+  os << "\n## " << interface.getName() << " (`" << interfaceDef.getName()
+     << "`)\n";
   if (auto description = interface.getDescription())
     mlir::tblgen::emitDescription(*description, os);
 
@@ -636,7 +636,7 @@ static void emitInterfaceDoc(const Record &interfaceDef, raw_ostream &os) {
   os << "\n### Methods:\n";
   for (const auto &method : interface.getMethods()) {
     // Emit the method name.
-    os << "#### `" << method.getName() << "`\n\n```c++\n";
+    os << "\n#### `" << method.getName() << "`\n\n```c++\n";
 
     // Emit the method signature.
     if (method.isStatic())
@@ -656,13 +656,13 @@ static void emitInterfaceDoc(const Record &interfaceDef, raw_ostream &os) {
     if (!method.getBody())
       os << "\nNOTE: This method *must* be implemented by the user.";
 
-    os << "\n\n";
+    os << "\n";
   }
 }
 
 bool InterfaceGenerator::emitInterfaceDocs() {
   os << "<!-- Autogenerated by mlir-tblgen; don't manually edit -->\n";
-  os << "# " << interfaceBaseType << " definitions\n";
+  os << "\n# " << interfaceBaseType << " definitions\n";
 
   for (const auto *def : defs)
     emitInterfaceDoc(*def, os);
diff --git a/mlir/tools/mlir-tblgen/PassDocGen.cpp b/mlir/tools/mlir-tblgen/PassDocGen.cpp
index a2cb514ece3eba..456f9ceffeb9b2 100644
--- a/mlir/tools/mlir-tblgen/PassDocGen.cpp
+++ b/mlir/tools/mlir-tblgen/PassDocGen.cpp
@@ -22,14 +22,14 @@ using llvm::RecordKeeper;
 
 /// Emit the documentation for the given pass.
 static void emitDoc(const Pass &pass, raw_ostream &os) {
-  os << llvm::formatv("### `-{0}`\n", pass.getArgument());
+  os << llvm::formatv("\n### `-{0}`\n", pass.getArgument());
   emitSummary(pass.getSummary(), os);
   emitDescription(pass.getDescription(), os);
 
   // Handle the options of the pass.
   ArrayRef<PassOption> options = pass.getOptions();
   if (!options.empty()) {
-    os << "\n#### Options\n```\n";
+    os << "\n#### Options\n\n```\n";
     size_t longestOption = 0;
     for (const PassOption &option : options)
       longestOption = std::max(option.getArgument().size(), longestOption);
@@ -44,7 +44,7 @@ static void emitDoc(const Pass &pass, raw_ostream &os) {
   // Handle the statistics of the pass.
   ArrayRef<PassStatistic> stats = pass.getStatistics();
   if (!stats.empty()) {
-    os << "\n#### Statistics\n```\n";
+    os << "\n#### Statistics\n\n```\n";
     size_t longestStat = 0;
     for (const PassStatistic &stat : stats)
       longestStat = std::max(stat.getName().size(), longestStat);

>From f04be095f42290ec9c6d2bda8e71b6f152332818 Mon Sep 17 00:00:00 2001
From: Kevin Gleason <gleasonk at google.com>
Date: Fri, 13 Dec 2024 20:35:33 +0000
Subject: [PATCH 2/2] Add tests

---
 mlir/test/mlir-tblgen/gen-dialect-doc.td | 22 +++++++++++++++++++++-
 mlir/test/mlir-tblgen/gen-pass-doc.td    | 20 ++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 mlir/test/mlir-tblgen/gen-pass-doc.td

diff --git a/mlir/test/mlir-tblgen/gen-dialect-doc.td b/mlir/test/mlir-tblgen/gen-dialect-doc.td
index eb462fc6510288..06c90b702278eb 100644
--- a/mlir/test/mlir-tblgen/gen-dialect-doc.td
+++ b/mlir/test/mlir-tblgen/gen-dialect-doc.td
@@ -36,7 +36,15 @@ def ACOp : Op<Test_Dialect, "c", [NoMemoryEffect, SingleBlockImplicitTerminator<
 def ABOp : Op<Test_Dialect, "b", [NoMemoryEffect, SingleBlockImplicitTerminator<"YieldOp">]>;
 }
 
-def AEOp : Op<Test_Dialect, "e", [NoMemoryEffect, SingleBlockImplicitTerminator<"YieldOp">]>;
+def AEOp : Op<Test_Dialect, "e", [NoMemoryEffect]> {
+  let summary = "Op with a summary";
+  let description = "Op with a description";
+  let arguments = (ins ConfinedType<AnyType, [CPred<"::llvm::isa<::mlir::TensorType>($_self)">]>:$tensor,
+                       I16Attr:$int_attr);
+  let results = (outs
+    ConfinedType<AnyType, [CPred<"::llvm::isa<::mlir::TensorType>($_self)">]>:$output
+  ); 
+}
 
 def TestAttr : DialectAttr<Test_Dialect, CPred<"true">> {
   let summary = "attribute summary";
@@ -85,7 +93,19 @@ def TestEnum :
 // CHECK: [TOC]
 
 // CHECK-NOT: [TOC]
+
 // CHECK: test.e
+// CHECK: _Op with a summary_
+// CHECK: Op with a description
+// CHECK: Operands:
+// CHECK: | Operand | Description |
+// CHECK: | :-----: | ----------- |
+// CHECK: | `tensor` | |
+// CHECK: Results:
+// CHECK: | Result | Description |
+// CHECK: | :----: | ----------- |
+// CHECK: | `output` | |
+
 // CHECK: Group of ops
 // CHECK: test.a
 // CHECK: test.d
diff --git a/mlir/test/mlir-tblgen/gen-pass-doc.td b/mlir/test/mlir-tblgen/gen-pass-doc.td
new file mode 100644
index 00000000000000..6c3aeabc508457
--- /dev/null
+++ b/mlir/test/mlir-tblgen/gen-pass-doc.td
@@ -0,0 +1,20 @@
+// RUN: mlir-tblgen -gen-pass-doc -I %S/../../include -dialect=test %s | FileCheck %s
+
+include "mlir/Pass/PassBase.td"
+
+def TestPassDoc : Pass<"test-pass-doc"> {
+  let summary = "pass summary";
+  let description = [{
+    Pass description
+  }];
+
+  let options = [
+    ListOption<"option", "option", "std::string", "pass option">
+  ];
+}
+
+// CHECK: `-test-pass-doc`
+// CHECK: _Pass summary_
+// CHECK: Pass description
+// CHECK: Options
+// CHECK: -option : pass option



More information about the Mlir-commits mailing list