[clang] [Clang][TableGen] Add documentation generation infrastructure for builtins (PR #181573)

Shilei Tian via cfe-commits cfe-commits at lists.llvm.org
Sun Feb 15 13:01:13 PST 2026


https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/181573

Add a `-gen-builtin-docs` TableGen backend that generates RST
documentation from builtin definitions, modeled after the existing
attribute documentation system (`-gen-attr-docs`).

The emitter generates per-builtin RST sections grouped by category, including
prototype rendering with optional named parameters (via `ArgNames`), target
feature annotations, and documentation content. A mismatch between `ArgNames`
count and prototype parameter count is a fatal error.

>From 40a3ec721439a195b8e1677b13b89b068fb0ed7e Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 15 Feb 2026 15:43:55 -0500
Subject: [PATCH] [Clang][TableGen] Add documentation generation infrastructure
 for builtins

Add a `-gen-builtin-docs` TableGen backend that generates RST
documentation from builtin definitions, modeled after the existing
attribute documentation system (`-gen-attr-docs`).

The emitter generates per-builtin RST sections grouped by category, including
prototype rendering with optional named parameters (via `ArgNames`), target
feature annotations, and documentation content. A mismatch between `ArgNames`
count and prototype parameter count is a fatal error.
---
 clang/include/clang/Basic/BuiltinsBase.td     |  50 ++++
 clang/test/TableGen/builtin-docs.td           | 248 ++++++++++++++++++
 clang/utils/TableGen/ClangBuiltinsEmitter.cpp | 182 +++++++++++++
 clang/utils/TableGen/TableGen.cpp             |   6 +
 clang/utils/TableGen/TableGenBackends.h       |   2 +
 5 files changed, 488 insertions(+)
 create mode 100644 clang/test/TableGen/builtin-docs.td

diff --git a/clang/include/clang/Basic/BuiltinsBase.td b/clang/include/clang/Basic/BuiltinsBase.td
index 7394f0efd8b0a..af9c0fd7d3cef 100644
--- a/clang/include/clang/Basic/BuiltinsBase.td
+++ b/clang/include/clang/Basic/BuiltinsBase.td
@@ -103,6 +103,50 @@ class NamePrefix<string spelling> {
   string Spelling = spelling;
 }
 
+// Documentation
+// =============
+
+class DocumentationCategory<string name> {
+  string Name = name;
+  code Content = [{}];
+}
+
+// This category is for builtins which have not yet been properly documented,
+// but should be.
+def DocCatUndocumented : DocumentationCategory<"Undocumented"> {
+  let Content = [{
+This section lists builtins which are recognized by Clang, but which are
+currently missing documentation.
+}];
+}
+
+// Builtins listed under the InternalOnly category do not generate any entry
+// in the documentation. This category should be used only when we _want_
+// to not document the builtin, e.g. if the builtin is for internal use only.
+def DocCatInternalOnly : DocumentationCategory<"InternalOnly">;
+
+// Specifies the documentation to be associated with the given category.
+class Documentation {
+  DocumentationCategory Category;
+  code Content = [{}];
+
+  // If the heading is empty, one may be picked automatically. If the builtin
+  // only has one spelling, no heading is required as the builtin's sole
+  // spelling is sufficient.
+  string Heading = "";
+}
+
+// Specifies that the builtin is explicitly omitted from the documentation,
+// because it is not intended to be user-facing.
+def InternalOnly : Documentation { let Category = DocCatInternalOnly; }
+
+// Specifies that the builtin is undocumented, but that it _should_ have
+// documentation.
+def Undocumented : Documentation {
+  let Category = DocCatUndocumented;
+  let Content = [{No documentation.}];
+}
+
 // Builtin kinds
 // =============
 
@@ -119,6 +163,12 @@ class Builtin {
   // Requires a common prefix to be prepended. Each generated set of builtins
   // can optionally extract one common prefix that is handled separately.
   NamePrefix RequiredNamePrefix;
+  // Documentation associated with this builtin.
+  list<Documentation> Documentation = [Undocumented];
+  // Optional argument names for documentation. When provided, these are
+  // interleaved with the parameter types from the Prototype field to produce
+  // a human-readable prototype in the generated documentation.
+  list<string> ArgNames = [];
 }
 
 class AtomicBuiltin : Builtin;
diff --git a/clang/test/TableGen/builtin-docs.td b/clang/test/TableGen/builtin-docs.td
new file mode 100644
index 0000000000000..730aa705f26ce
--- /dev/null
+++ b/clang/test/TableGen/builtin-docs.td
@@ -0,0 +1,248 @@
+// RUN: clang-tblgen -gen-builtin-docs -I%p/../../include %s -o - 2>&1 | \
+// RUN:    FileCheck %s
+
+// Test that mismatched ArgNames count produces an error.
+// RUN: not clang-tblgen -gen-builtin-docs -I%p/../../include %s -o - \
+// RUN:    -DERROR_ARGNAMES_MISMATCH 2>&1 | \
+// RUN:    FileCheck --check-prefix=ERROR-MISMATCH %s
+
+include "clang/Basic/BuiltinsBase.td"
+
+//===----------------------------------------------------------------------===//
+// Test categories
+//===----------------------------------------------------------------------===//
+
+def DocCatTestABI : DocumentationCategory<"ABI Builtins"> {
+  let Content = [{
+These builtins provide access to ABI-related information.
+}];
+}
+
+def DocCatTestWorkItem : DocumentationCategory<"Work-Item Builtins"> {
+  let Content = [{
+These builtins return work-item identification.
+}];
+}
+
+def DocCatTestInstruction : DocumentationCategory<"Instruction Builtins"> {
+  let Content = [{
+These builtins map to specific hardware instructions.
+}];
+}
+
+//===----------------------------------------------------------------------===//
+// Standalone documentation record (defined separately from the builtin)
+//===----------------------------------------------------------------------===//
+
+def TestDispatchPtrDoc : Documentation {
+  let Category = DocCatTestABI;
+  let Content = [{
+Returns a pointer to the dispatch packet.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalDocumentation (required by the emitter)
+//===----------------------------------------------------------------------===//
+
+def GlobalDocumentation {
+  code Intro = [{..
+  -------------------------------------------------------------------
+  NOTE: This file is automatically generated by running clang-tblgen
+  -gen-builtin-docs. Do not edit this file by hand!!
+  -------------------------------------------------------------------
+
+========================
+Test Builtin Reference
+========================
+.. contents::
+   :local:
+
+Introduction
+============
+
+This is a test for builtin documentation generation.
+}];
+}
+
+//===----------------------------------------------------------------------===//
+// Test builtin class and definitions
+//===----------------------------------------------------------------------===//
+
+class TestBuiltin<string prototype> : TargetBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+  let Features = "";
+}
+
+class TestBuiltinFeat<string prototype, string feat> : TargetBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+  let Features = feat;
+}
+
+// --- Using a standalone documentation record (defined above) ---
+
+let Documentation = [TestDispatchPtrDoc] in
+def __builtin_test_dispatch_ptr : TestBuiltin<"void *()">;
+
+// --- Using inline documentation records (defined next to the builtin) ---
+
+// No arguments, address-space qualified return type.
+def KernargPtrDocs : Documentation {
+  let Category = DocCatTestABI;
+  let Content = [{
+Returns a pointer to the kernarg segment in constant address space.
+  }];
+}
+def __builtin_test_kernarg_ptr : TestBuiltin<"void address_space<4> *()"> {
+  let Documentation = [KernargPtrDocs];
+}
+
+// No arguments, scalar return, with target feature.
+def WorkitemIdXDocs : Documentation {
+  let Category = DocCatTestWorkItem;
+  let Content = [{
+Returns the work-item id in the x dimension.
+  }];
+}
+def __builtin_test_workitem_id_x : TestBuiltinFeat<"unsigned int()", "some-feature"> {
+  let Documentation = [WorkitemIdXDocs];
+}
+
+// Multiple float arguments with ArgNames and target feature.
+def FmaDocs : Documentation {
+  let Category = DocCatTestInstruction;
+  let Content = [{
+Performs a fused multiply-add: returns ``x * y + z``.
+  }];
+}
+def __builtin_test_fma : TestBuiltinFeat<"float(float, float, float)", "fma-feature"> {
+  let ArgNames = ["x", "y", "z"];
+  let Documentation = [FmaDocs];
+}
+
+// Multiple unsigned int arguments with ArgNames.
+def BfeDocs : Documentation {
+  let Category = DocCatTestInstruction;
+  let Content = [{
+Performs a bitfield extract on a 32-bit integer.
+  }];
+}
+def __builtin_test_bfe : TestBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)"> {
+  let ArgNames = ["src", "offset", "width"];
+  let Documentation = [BfeDocs];
+}
+
+// Void return with a single argument and ArgNames.
+def FenceDocs : Documentation {
+  let Category = DocCatTestInstruction;
+  let Content = [{
+Inserts a scheduling fence with the specified ordering.
+  }];
+}
+def __builtin_test_fence : TestBuiltin<"void(unsigned int)"> {
+  let ArgNames = ["ordering"];
+  let Documentation = [FenceDocs];
+}
+
+// Mixed types with ArgNames and address_space in a parameter.
+def StoreDocs : Documentation {
+  let Category = DocCatTestInstruction;
+  let Content = [{
+Stores a value to the given global memory pointer.
+  }];
+}
+def __builtin_test_store : TestBuiltin<"void(int, int address_space<1> *)"> {
+  let ArgNames = ["val", "ptr"];
+  let Documentation = [StoreDocs];
+}
+
+// This builtin uses default Undocumented documentation (no ArgNames).
+def __builtin_test_undocumented : TestBuiltin<"void()">;
+
+// This builtin has no "let Documentation" at all -- the most common case for
+// existing builtins. It should still work and land in the Undocumented section.
+def __builtin_test_no_doc : TestBuiltin<"int(int, int)">;
+
+// CHECK: Test Builtin Reference
+// CHECK: This is a test for builtin documentation generation.
+
+// --- ABI Builtins (sorted first alphabetically) ---
+// CHECK:      ABI Builtins
+// CHECK-NEXT: ============
+// CHECK: These builtins provide access to ABI-related information.
+
+// void pointer return, no args (standalone doc record).
+// CHECK:      ``__builtin_test_dispatch_ptr``
+// CHECK:      void * __builtin_test_dispatch_ptr()
+// CHECK:      Returns a pointer to the dispatch packet.
+
+// Address-space qualified pointer return, no args (inline doc).
+// CHECK:      ``__builtin_test_kernarg_ptr``
+// CHECK:      void address_space<4> * __builtin_test_kernarg_ptr()
+// CHECK:      Returns a pointer to the kernarg segment in constant address space.
+
+// --- Instruction Builtins ---
+// CHECK:      Instruction Builtins
+// CHECK-NEXT: ====================
+// CHECK:      These builtins map to specific hardware instructions.
+
+// Multiple unsigned int arguments with ArgNames.
+// CHECK:      ``__builtin_test_bfe``
+// CHECK:      unsigned int __builtin_test_bfe(unsigned int src, unsigned int offset, unsigned int width)
+// CHECK:      Performs a bitfield extract on a 32-bit integer.
+
+// Void return with single ArgName.
+// CHECK:      ``__builtin_test_fence``
+// CHECK:      void __builtin_test_fence(unsigned int ordering)
+// CHECK:      Inserts a scheduling fence with the specified ordering.
+
+// Multiple float arguments with ArgNames and target feature.
+// CHECK:      ``__builtin_test_fma``
+// CHECK:      float __builtin_test_fma(float x, float y, float z)
+// CHECK:      **Target Features:** fma-feature
+// CHECK:      Performs a fused multiply-add: returns ``x * y + z``.
+
+// Mixed types with ArgNames including address_space in a parameter.
+// CHECK:      ``__builtin_test_store``
+// CHECK:      void __builtin_test_store(int val, int address_space<1> * ptr)
+// CHECK:      Stores a value to the given global memory pointer.
+
+// --- Undocumented (alphabetically between Instruction and Work-Item) ---
+// CHECK:      Undocumented
+// CHECK-NEXT: ============
+
+// Builtin with no "let Documentation" at all -- uses the class default.
+// CHECK:      ``__builtin_test_no_doc``
+// CHECK:      int __builtin_test_no_doc(int, int)
+// CHECK:      No documentation.
+
+// CHECK:      ``__builtin_test_undocumented``
+// CHECK:      void __builtin_test_undocumented()
+// CHECK:      No documentation.
+
+// --- Work-Item Builtins (inline doc, no ArgNames) ---
+// CHECK:      Work-Item Builtins
+// CHECK-NEXT: ==================
+// CHECK:      These builtins return work-item identification.
+// CHECK:      ``__builtin_test_workitem_id_x``
+// CHECK:      unsigned int __builtin_test_workitem_id_x()
+// CHECK:      **Target Features:** some-feature
+// CHECK:      Returns the work-item id in the x dimension.
+
+//===----------------------------------------------------------------------===//
+// Error test: ArgNames count mismatch
+//===----------------------------------------------------------------------===//
+
+#ifdef ERROR_ARGNAMES_MISMATCH
+def MismatchDocs : Documentation {
+  let Category = DocCatTestInstruction;
+  let Content = [{This should error.}];
+}
+// ERROR-MISMATCH: error: number of ArgNames (2) does not match number of prototype parameters (3)
+def __builtin_test_mismatch : TestBuiltin<"void(int, int, int)"> {
+  let ArgNames = ["a", "b"];
+  let Documentation = [MismatchDocs];
+}
+#endif
diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index d61226a5c5f5c..efa576d09ebe4 100644
--- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -11,6 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "TableGenBackends.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/TableGen/Error.h"
@@ -605,3 +607,183 @@ void clang::EmitClangBuiltins(const RecordKeeper &Records, raw_ostream &OS) {
 #undef ATOMIC_BUILTIN
 )c++";
 }
+
+//===----------------------------------------------------------------------===//
+// Builtin documentation emitter
+//===----------------------------------------------------------------------===//
+
+/// Holds the data needed to emit documentation for a single builtin.
+namespace {
+struct BuiltinDocData {
+  const Record *Documentation = nullptr;
+  const Record *BuiltinRecord = nullptr;
+  std::string Heading;
+};
+} // namespace
+
+static void writeCategoryHeader(const Record *Category, raw_ostream &OS) {
+  StringRef CategoryName = Category->getValueAsString("Name");
+  OS << "\n" << CategoryName << "\n";
+  for (size_t I = 0, E = CategoryName.size(); I < E; ++I)
+    OS << "=";
+  OS << "\n\n";
+
+  StringRef CategoryContent = Category->getValueAsString("Content");
+  if (!CategoryContent.trim().empty())
+    OS << CategoryContent.trim() << "\n\n";
+}
+
+/// Split a parameter list string into individual parameter type strings,
+/// respecting nested angle brackets (e.g. address_space<4>, _ExtVector<4,
+/// float>).
+static SmallVector<StringRef> splitParams(StringRef Params) {
+  SmallVector<StringRef> Result;
+  if (Params.empty())
+    return Result;
+
+  int Depth = 0;
+  size_t Start = 0;
+  for (size_t I = 0, E = Params.size(); I < E; ++I) {
+    if (Params[I] == '<') {
+      ++Depth;
+    } else if (Params[I] == '>') {
+      --Depth;
+    } else if (Params[I] == ',' && Depth == 0) {
+      Result.push_back(Params.substr(Start, I - Start).trim());
+      Start = I + 1;
+    }
+  }
+  // Add the last parameter.
+  StringRef Last = Params.substr(Start).trim();
+  if (!Last.empty())
+    Result.push_back(Last);
+  return Result;
+}
+
+static void writeBuiltinDocumentation(const BuiltinDocData &Doc,
+                                      raw_ostream &OS) {
+  // Write heading with '-' underline (subsection).
+  std::string HeadingText = "``" + Doc.Heading + "``";
+  OS << HeadingText << "\n";
+  for (size_t I = 0, E = HeadingText.size(); I < E; ++I)
+    OS << "-";
+  OS << "\n\n";
+
+  // Write prototype as a code block.
+  StringRef Prototype = Doc.BuiltinRecord->getValueAsString("Prototype");
+  if (!Prototype.empty()) {
+    std::vector<StringRef> Spellings =
+        Doc.BuiltinRecord->getValueAsListOfStrings("Spellings");
+    StringRef Name =
+        Spellings.empty() ? Doc.BuiltinRecord->getName() : Spellings[0];
+
+    // Split prototype into return type and params at the first '('.
+    size_t ParenPos = Prototype.find('(');
+    if (ParenPos != StringRef::npos) {
+      StringRef RetType = Prototype.substr(0, ParenPos).rtrim();
+      StringRef ParamStr =
+          Prototype.substr(ParenPos + 1, Prototype.size() - ParenPos - 2);
+
+      OS << "**Prototype:**\n\n";
+      OS << ".. code-block:: c\n\n";
+      OS << "  " << RetType << " " << Name << "(";
+
+      std::vector<StringRef> ArgNames =
+          Doc.BuiltinRecord->getValueAsListOfStrings("ArgNames");
+      if (!ArgNames.empty()) {
+        SmallVector<StringRef> ParamTypes = splitParams(ParamStr);
+        if (ArgNames.size() != ParamTypes.size())
+          PrintFatalError(Doc.BuiltinRecord->getLoc(),
+                          "number of ArgNames (" + Twine(ArgNames.size()) +
+                              ") does not match number of prototype "
+                              "parameters (" +
+                              Twine(ParamTypes.size()) + ")");
+        for (size_t I = 0, E = ParamTypes.size(); I < E; ++I) {
+          if (I > 0)
+            OS << ", ";
+          OS << ParamTypes[I] << " " << ArgNames[I];
+        }
+      } else {
+        OS << ParamStr;
+      }
+
+      OS << ")\n\n";
+    }
+  }
+
+  // Write target features if this is a TargetBuiltin with features.
+  if (Doc.BuiltinRecord->isSubClassOf("TargetBuiltin")) {
+    StringRef Features = Doc.BuiltinRecord->getValueAsString("Features");
+    if (!Features.empty())
+      OS << "**Target Features:** " << Features << "\n\n";
+  }
+
+  // Write documentation content.
+  StringRef Content = Doc.Documentation->getValueAsString("Content");
+  OS << Content.trim() << "\n\n\n";
+}
+
+void clang::EmitClangBuiltinDocs(const RecordKeeper &Records, raw_ostream &OS) {
+  // Get the documentation introduction paragraph.
+  const Record *Doc = Records.getDef("GlobalDocumentation");
+  if (!Doc) {
+    PrintFatalError("The GlobalDocumentation top-level definition is missing, "
+                    "no documentation will be generated.");
+    return;
+  }
+
+  OS << Doc->getValueAsString("Intro") << "\n";
+
+  // Gather documentation from each builtin, grouped by category.
+  llvm::MapVector<const Record *, std::vector<BuiltinDocData>> SplitDocs;
+
+  for (const Record *B : Records.getAllDerivedDefinitions("Builtin")) {
+    for (const Record *D : B->getValueAsListOfDefs("Documentation")) {
+      const Record *Category = D->getValueAsDef("Category");
+      StringRef Cat = Category->getValueAsString("Name");
+
+      // Skip builtins that are explicitly internal-only.
+      if (Cat == "InternalOnly")
+        continue;
+
+      BuiltinDocData Data;
+      Data.Documentation = D;
+      Data.BuiltinRecord = B;
+
+      // Use the Heading field if set, otherwise use the builtin's first
+      // spelling.
+      StringRef Heading = D->getValueAsString("Heading");
+      if (Heading.empty()) {
+        std::vector<StringRef> Spellings =
+            B->getValueAsListOfStrings("Spellings");
+        if (!Spellings.empty())
+          Data.Heading = Spellings[0];
+        else
+          Data.Heading = B->getName().str();
+      } else {
+        Data.Heading = Heading.str();
+      }
+
+      SplitDocs[Category].push_back(std::move(Data));
+    }
+  }
+
+  // Sort categories alphabetically by name for deterministic output.
+  llvm::sort(SplitDocs, [](const auto &A, const auto &B) {
+    return A.first->getValueAsString("Name") <
+           B.first->getValueAsString("Name");
+  });
+
+  // Write out each category and its builtins.
+  for (auto &[Category, Docs] : SplitDocs) {
+    writeCategoryHeader(Category, OS);
+
+    // Sort entries alphabetically by heading.
+    llvm::sort(Docs, [](const BuiltinDocData &A, const BuiltinDocData &B) {
+      return A.Heading < B.Heading;
+    });
+
+    for (const BuiltinDocData &D : Docs)
+      writeBuiltinDocumentation(D, OS);
+  }
+}
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 707ce617cb2d0..cc2e7fef79659 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -118,6 +118,7 @@ enum ActionType {
   GenRISCVAndesVectorBuiltinCG,
   GenRISCVAndesVectorBuiltinSema,
   GenAttrDocs,
+  GenBuiltinDocs,
   GenDiagDocs,
   GenOptDocs,
   GenDataCollectors,
@@ -344,6 +345,8 @@ cl::opt<ActionType> Action(
                    "Generate riscv_andes_vector_builtin_sema.inc for clang"),
         clEnumValN(GenAttrDocs, "gen-attr-docs",
                    "Generate attribute documentation"),
+        clEnumValN(GenBuiltinDocs, "gen-builtin-docs",
+                   "Generate builtin documentation"),
         clEnumValN(GenDiagDocs, "gen-diag-docs",
                    "Generate diagnostic documentation"),
         clEnumValN(GenOptDocs, "gen-opt-docs", "Generate option documentation"),
@@ -648,6 +651,9 @@ bool ClangTableGenMain(raw_ostream &OS, const RecordKeeper &Records) {
   case GenAttrDocs:
     EmitClangAttrDocs(Records, OS);
     break;
+  case GenBuiltinDocs:
+    EmitClangBuiltinDocs(Records, OS);
+    break;
   case GenDiagDocs:
     EmitClangDiagDocs(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 058bda3ebd246..cea1af0532db7 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -93,6 +93,8 @@ void EmitClangAttrDocTable(const llvm::RecordKeeper &Records,
 
 void EmitClangBuiltins(const llvm::RecordKeeper &Records,
                        llvm::raw_ostream &OS);
+void EmitClangBuiltinDocs(const llvm::RecordKeeper &Records,
+                          llvm::raw_ostream &OS);
 void EmitClangBuiltinTemplates(const llvm::RecordKeeper &Records,
                                llvm::raw_ostream &OS);
 



More information about the cfe-commits mailing list