[clang] [NFC][analyzer] Document configuration options (PR #135169)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 14 09:40:37 PDT 2025


================
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+# A tool to automatically generate documentation for the config options of the
+# clang static analyzer by reading `AnalyzerOptions.def`.
+
+import argparse
+from collections import namedtuple
+from enum import Enum, auto
+import re
+import sys
+import textwrap
+
+
+# The following code implements a trivial parser for the narrow subset of C++
+# which is used in AnalyzerOptions.def. This supports the following features:
+# - ignores preprocessor directives, even if they are continued with \ at EOL
+# - ignores comments: both /* ... */ and // ...
+# - parses string literals (even if they contain \" escapes)
+# - concatenates adjacent string literals
+# - parses numbers even if they contain ' as a thousands separator
+# - recognizes MACRO(arg1, arg2, ..., argN) calls
+
+
+class TT(Enum):
+    "Token type enum."
+    number = auto()
+    ident = auto()
+    string = auto()
+    punct = auto()
+
+
+TOKENS = [
+    (re.compile(r"-?[0-9']+"), TT.number),
+    (re.compile(r"\w+"), TT.ident),
+    (re.compile(r'"([^\\"]|\\.)*"'), TT.string),
+    (re.compile(r"[(),]"), TT.punct),
+    (re.compile(r"/\*((?!\*/).)*\*/", re.S), None),  # C-style comment
+    (re.compile(r"//.*\n"), None),  # C++ style oneline comment
+    (re.compile(r"#.*(\\\n.*)*(?<!\\)\n"), None),  # preprocessor directive
+    (re.compile(r"\s+"), None),  # whitespace
+]
+
+Token = namedtuple("Token", "kind code")
+
+
+def report_unexpected(s, pos):
+    lines = (s[:pos] + "X").split("\n")
+    lineno, col = (len(lines), len(lines[-1]))
+    print(
+        "unexpected character %r in AnalyzerOptions.def at line %d column %d"
+        % (s[pos], lineno, col),
+        file=sys.stderr,
+    )
+
+
+def tokenize(s):
+    result = []
+    pos = 0
+    while pos < len(s):
+        for regex, kind in TOKENS:
+            if m := regex.match(s, pos):
+                if kind is not None:
+                    result.append(Token(kind, m.group(0)))
+                pos = m.end()
+                break
+        else:
+            report_unexpected(s, pos)
+            pos += 1
+    return result
----------------
steakhal wrote:

Do you intentionally tokenize yourself? I figured Python had dozens of libraries doing this for us.
Did you try to avoid 3rd party deps by this?

https://github.com/llvm/llvm-project/pull/135169


More information about the cfe-commits mailing list