[Lldb-commits] [lldb] [lldb][docs] Generate the Python API enums page from headers (PR #202780)

Wed Jun 10 03:47:25 PDT 2026

================
@@ -0,0 +1,309 @@
+"""Generate the "Python API enumerators and constants" documentation page.
+
+LLDB exposes the enumerators from `lldb-enumerations.h` and the constants from
+`lldb-defines.h` as attributes of the `lldb` Python module. This script parses
+those two headers and emits a Markdown page documenting every public value, so
+the page can no longer drift out of sync with the source the way a
+hand-maintained copy does.
+
+The page is generated at build time and pulled into `python_api_enums.md` via
+the `{build-include}` directive (see `lldb/docs/_ext/build_include.py`).
+"""
+
+import argparse
+import re
+from dataclasses import dataclass, field
+
+# Matches the start of an enum declaration up to and including the opening
+# brace, capturing the enum name. Covers plain `enum Name {`, scoped
+# `enum Name : type {`, and the `FLAGS_ENUM(Name){` / `FLAGS_ANONYMOUS_ENUM()`
+# macros from lldb-enumerations.h. Enum bodies never contain nested braces, so
+# the matching `}` is simply the next one in the text.
+ENUM_RE = re.compile(
+    r"(?:enum\s+(?P<name>\w+)\s*(?::\s*[\w:]+\s*)?"
+    r"|FLAGS_ENUM\(\s*(?P<flags_name>\w+)\s*\)"
+    r"|FLAGS_ANONYMOUS_ENUM\(\s*\))\s*\{"
+)
+
+# Doxygen inline commands that wrap a following word for emphasis or reference.
+# We drop the command itself and keep its argument.
+DOXYGEN_CMD_RE = re.compile(r"\\(?:a|b|c|e|p|ref|see|link|endlink)\b\s?")
+
+# Constants are grouped editorially to match the long-standing layout of the
+# page. The classifier is prefix-based so new constants land in a sensible
+# group without further maintenance; anything unrecognized falls into
+# "Miscellaneous constants".
+CONSTANT_GROUP_ORDER = [
+    "Generic register numbers",
+    "Invalid value definitions",
+    "CPU types",
+    "Option set definitions",
+    "Miscellaneous constants",
+]
+
+
+def slugify(text):
+    return re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
+
+
+def clean_comment(text):
+    """Strip a doc-comment fragment down to its prose."""
+    return DOXYGEN_CMD_RE.sub("", text).rstrip()
+
+
+ at dataclass
+class Member:
+    name: str
+    desc: list = field(default_factory=list)  # lines; "" marks a paragraph break
+
+
+def parse_enum_body(body):
+    """Parse the body of an enum into a list of documented members.
+
+    Comment association follows Doxygen conventions, with one accommodation for
+    the header's occasional misuse of `///<` on its own line as a *leading*
+    comment (see WatchpointValueKind): a trailing `///<` documents the member on
+    its own line, while a standalone doc comment that isn't continuing a
+    trailing comment is treated as a leading comment for the next member.
+    """
+    members = []
+    pending_lead = []  # leading doc lines awaiting the next member
+    current = None  # most recently named member (target of trailing comments)
+    in_trailing = False  # currently extending a member's trailing comment
+    awaiting_name = True  # next identifier starts a new member
+    depth = 0  # parenthesis nesting, to find top-level commas
+
+    def attach_lead(member):
+        # Drop a leading line that merely repeats the member name (the style
+        # used by CommandFlags) along with its trailing blank.
+        lead = pending_lead[:]
+        while lead and lead[0] == "":
+            lead.pop(0)
+        if lead and lead[0] == member.name:
+            lead.pop(0)
+            while lead and lead[0] == "":
+                lead.pop(0)
+        member.desc.extend(lead)
+
+    for line in body.splitlines():
+        comment_start = line.find("//")
+        if comment_start == -1:
+            code, comment = line, None
+        else:
+            code, comment = line[:comment_start], line[comment_start:]
+
+        # Walk the code, picking out member names and top-level commas.
+        i = 0
+        while i < len(code):
+            ch = code[i]
+            if ch == "(":
+                depth += 1
+            elif ch == ")":
+                depth -= 1
+            elif ch == "," and depth == 0:
+                awaiting_name = True
+            elif awaiting_name and (ch.isalpha() or ch == "_"):
+                j = i
+                while j < len(code) and (code[j].isalnum() or code[j] == "_"):
+                    j += 1
+                name = code[i:j]
+                current = Member(name)
+                attach_lead(current)
+                pending_lead = []
+                in_trailing = False
+                awaiting_name = False
+                # Only public enumerators (the `e` prefix) are documented;
+                # `k`-prefixed sentinels like kNumFormats are internal.
+                if name.startswith("e"):
+                    members.append(current)
+                i = j
+                continue
+            i += 1
+
+        if comment is not None:
+            has_code = bool(code.strip())
+            if comment.startswith("///<"):
+                text = clean_comment(comment[4:].lstrip())
+                if has_code and current is not None:
+                    current.desc.append(text)
+                    in_trailing = True
+                elif in_trailing and current is not None:
+                    current.desc.append(text)
+                else:
+                    pending_lead.append(text)
+            elif comment.startswith("///"):
+                text = clean_comment(comment[3:].lstrip())
+                if has_code and current is not None:
+                    current.desc.append(text)
+                    in_trailing = True
+                elif in_trailing and current is not None:
+                    current.desc.append(text)
+                else:
+                    pending_lead.append(text)
+            # A plain `//` comment is an internal note; ignore it.
+        elif not code.strip():
+            # Blank line: ends any trailing-comment continuation and separates
+            # paragraphs in an accumulating leading comment.
+            in_trailing = False
+            if pending_lead and pending_lead[-1] != "":
+                pending_lead.append("")
+
+    return members
+
+
+def parse_enums(text):
+    """Yield (name, description_lines, members) for each enum in the header."""
+    for match in ENUM_RE.finditer(text):
+        name = match.group("name") or match.group("flags_name")
+        if name is None:
+            continue  # anonymous flag enums have no name to document
+        close = text.index("}", match.end())
+        members = parse_enum_body(text[match.end() : close])
+        if not members:
+            continue
+        yield name, leading_description(text[: match.start()]), members
+
+
+def leading_description(preceding_text):
+    """Collect the `///` doc comment immediately above a declaration."""
+    lines = []
+    for line in reversed(preceding_text.splitlines()):
+        if not line.strip().startswith("//"):
+            break
+        lines.append(line)
+    lines.reverse()
+
+    desc = []
+    for line in lines:
+        stripped = line.strip()
+        if stripped.startswith("///"):
+            desc.append(clean_comment(stripped[3:].lstrip()))
----------------
Teemperor wrote:

Just to teach Claude some new old tricks, but since 3.9 we can just do `str.removeprefix`:

```python
>>> "///comment".removeprefix("///")
'comment'
>>> "/*/comment".removeprefix("///")
'/*/comment'
>>> 
```

https://github.com/llvm/llvm-project/pull/202780