[Lldb-commits] [lldb] [lldb][docs] Generate the Python API enums page from headers (PR #202780)
Raphael Isemann via lldb-commits
lldb-commits at lists.llvm.org
Wed Jun 10 03:47:25 PDT 2026
================
@@ -0,0 +1,309 @@
+"""Generate the "Python API enumerators and constants" documentation page.
+
+LLDB exposes the enumerators from `lldb-enumerations.h` and the constants from
+`lldb-defines.h` as attributes of the `lldb` Python module. This script parses
+those two headers and emits a Markdown page documenting every public value, so
+the page can no longer drift out of sync with the source the way a
+hand-maintained copy does.
+
+The page is generated at build time and pulled into `python_api_enums.md` via
+the `{build-include}` directive (see `lldb/docs/_ext/build_include.py`).
+"""
+
+import argparse
+import re
+from dataclasses import dataclass, field
+
+# Matches the start of an enum declaration up to and including the opening
+# brace, capturing the enum name. Covers plain `enum Name {`, scoped
+# `enum Name : type {`, and the `FLAGS_ENUM(Name){` / `FLAGS_ANONYMOUS_ENUM()`
+# macros from lldb-enumerations.h. Enum bodies never contain nested braces, so
+# the matching `}` is simply the next one in the text.
+ENUM_RE = re.compile(
+ r"(?:enum\s+(?P<name>\w+)\s*(?::\s*[\w:]+\s*)?"
+ r"|FLAGS_ENUM\(\s*(?P<flags_name>\w+)\s*\)"
+ r"|FLAGS_ANONYMOUS_ENUM\(\s*\))\s*\{"
+)
+
+# Doxygen inline commands that wrap a following word for emphasis or reference.
+# We drop the command itself and keep its argument.
+DOXYGEN_CMD_RE = re.compile(r"\\(?:a|b|c|e|p|ref|see|link|endlink)\b\s?")
+
+# Constants are grouped editorially to match the long-standing layout of the
+# page. The classifier is prefix-based so new constants land in a sensible
+# group without further maintenance; anything unrecognized falls into
+# "Miscellaneous constants".
+CONSTANT_GROUP_ORDER = [
+ "Generic register numbers",
+ "Invalid value definitions",
+ "CPU types",
+ "Option set definitions",
+ "Miscellaneous constants",
+]
+
+
+def slugify(text):
+ return re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
+
+
+def clean_comment(text):
+ """Strip a doc-comment fragment down to its prose."""
+ return DOXYGEN_CMD_RE.sub("", text).rstrip()
+
+
+ at dataclass
+class Member:
+ name: str
+ desc: list = field(default_factory=list) # lines; "" marks a paragraph break
+
+
+def parse_enum_body(body):
+ """Parse the body of an enum into a list of documented members.
+
+ Comment association follows Doxygen conventions, with one accommodation for
+ the header's occasional misuse of `///<` on its own line as a *leading*
+ comment (see WatchpointValueKind): a trailing `///<` documents the member on
+ its own line, while a standalone doc comment that isn't continuing a
+ trailing comment is treated as a leading comment for the next member.
+ """
+ members = []
+ pending_lead = [] # leading doc lines awaiting the next member
+ current = None # most recently named member (target of trailing comments)
+ in_trailing = False # currently extending a member's trailing comment
+ awaiting_name = True # next identifier starts a new member
+ depth = 0 # parenthesis nesting, to find top-level commas
+
+ def attach_lead(member):
+ # Drop a leading line that merely repeats the member name (the style
+ # used by CommandFlags) along with its trailing blank.
+ lead = pending_lead[:]
+ while lead and lead[0] == "":
+ lead.pop(0)
+ if lead and lead[0] == member.name:
+ lead.pop(0)
+ while lead and lead[0] == "":
+ lead.pop(0)
+ member.desc.extend(lead)
+
+ for line in body.splitlines():
+ comment_start = line.find("//")
+ if comment_start == -1:
+ code, comment = line, None
+ else:
+ code, comment = line[:comment_start], line[comment_start:]
+
+ # Walk the code, picking out member names and top-level commas.
+ i = 0
+ while i < len(code):
+ ch = code[i]
+ if ch == "(":
+ depth += 1
+ elif ch == ")":
+ depth -= 1
+ elif ch == "," and depth == 0:
+ awaiting_name = True
+ elif awaiting_name and (ch.isalpha() or ch == "_"):
+ j = i
+ while j < len(code) and (code[j].isalnum() or code[j] == "_"):
+ j += 1
+ name = code[i:j]
+ current = Member(name)
+ attach_lead(current)
+ pending_lead = []
+ in_trailing = False
+ awaiting_name = False
+ # Only public enumerators (the `e` prefix) are documented;
+ # `k`-prefixed sentinels like kNumFormats are internal.
+ if name.startswith("e"):
+ members.append(current)
+ i = j
+ continue
+ i += 1
+
+ if comment is not None:
+ has_code = bool(code.strip())
+ if comment.startswith("///<"):
+ text = clean_comment(comment[4:].lstrip())
+ if has_code and current is not None:
+ current.desc.append(text)
+ in_trailing = True
+ elif in_trailing and current is not None:
+ current.desc.append(text)
+ else:
+ pending_lead.append(text)
+ elif comment.startswith("///"):
+ text = clean_comment(comment[3:].lstrip())
+ if has_code and current is not None:
+ current.desc.append(text)
+ in_trailing = True
+ elif in_trailing and current is not None:
+ current.desc.append(text)
+ else:
+ pending_lead.append(text)
+ # A plain `//` comment is an internal note; ignore it.
+ elif not code.strip():
+ # Blank line: ends any trailing-comment continuation and separates
+ # paragraphs in an accumulating leading comment.
+ in_trailing = False
+ if pending_lead and pending_lead[-1] != "":
+ pending_lead.append("")
+
+ return members
+
+
+def parse_enums(text):
+ """Yield (name, description_lines, members) for each enum in the header."""
+ for match in ENUM_RE.finditer(text):
+ name = match.group("name") or match.group("flags_name")
+ if name is None:
+ continue # anonymous flag enums have no name to document
+ close = text.index("}", match.end())
+ members = parse_enum_body(text[match.end() : close])
+ if not members:
+ continue
+ yield name, leading_description(text[: match.start()]), members
+
+
+def leading_description(preceding_text):
+ """Collect the `///` doc comment immediately above a declaration."""
+ lines = []
+ for line in reversed(preceding_text.splitlines()):
+ if not line.strip().startswith("//"):
+ break
+ lines.append(line)
+ lines.reverse()
+
+ desc = []
+ for line in lines:
+ stripped = line.strip()
+ if stripped.startswith("///"):
+ desc.append(clean_comment(stripped[3:].lstrip()))
----------------
Teemperor wrote:
Just to teach Claude some new old tricks, but since 3.9 we can just do `str.removeprefix`:
```python
>>> "///comment".removeprefix("///")
'comment'
>>> "/*/comment".removeprefix("///")
'/*/comment'
>>>
```
https://github.com/llvm/llvm-project/pull/202780
More information about the lldb-commits
mailing list