[llvm] [Github][CI] Add `doc8` for clang-tidy documentation formatting (PR #168827)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 19 21:53:35 PST 2025


https://github.com/zeyi2 updated https://github.com/llvm/llvm-project/pull/168827

>From 9552857532ca486931f0bcb247d1cd5df6a70c4d Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Thu, 20 Nov 2025 13:03:28 +0800
Subject: [PATCH 1/2] [Github][CI] Add `doc8` for clang-tidy documentation
 formatting

---
 .../github-action-ci-tooling/Dockerfile       |   4 +
 .github/workflows/pr-code-lint.yml            |  25 ++-
 llvm/utils/git/code-lint-helper.py            | 201 ++++++++++++++++--
 3 files changed, 203 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/containers/github-action-ci-tooling/Dockerfile b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
index b78c99efb9be3..8d02baa05f489 100644
--- a/.github/workflows/containers/github-action-ci-tooling/Dockerfile
+++ b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
@@ -94,6 +94,10 @@ COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/cla
 COPY clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py ${LLVM_SYSROOT}/bin/clang-tidy-diff.py
 
 # Install dependencies for 'pr-code-lint.yml' job
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y python3-doc8 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 COPY llvm/utils/git/requirements_linting.txt requirements_linting.txt
 RUN pip install -r requirements_linting.txt --break-system-packages && \
     rm requirements_linting.txt
diff --git a/.github/workflows/pr-code-lint.yml b/.github/workflows/pr-code-lint.yml
index 5444a29c22205..60c1900000e5e 100644
--- a/.github/workflows/pr-code-lint.yml
+++ b/.github/workflows/pr-code-lint.yml
@@ -30,7 +30,7 @@ jobs:
         uses: actions/checkout at 08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 2
-      
+
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files at 24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
@@ -39,14 +39,14 @@ jobs:
           skip_initial_fetch: true
           base_sha: 'HEAD~1'
           sha: 'HEAD'
-      
+
       - name: Listed files
         env:
           CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
         run: |
           echo "Changed files:"
           echo "$CHANGED_FILES"
-      
+
       # TODO: create special mapping for 'codegen' targets, for now build predefined set
       # TODO: add entrypoint in 'compute_projects.py' that only adds a project and its direct dependencies
       - name: Configure and CodeGen
@@ -71,25 +71,38 @@ jobs:
                 -DLLVM_INCLUDE_TESTS=OFF \
                 -DCLANG_INCLUDE_TESTS=OFF \
                 -DCMAKE_BUILD_TYPE=Release
-          
+
           ninja -C build \
                 clang-tablegen-targets \
                 genconfusable               # for "ConfusableIdentifierCheck.h"
 
-      - name: Run code linter
+      - name: Run clang-tidy linter
         env:
           GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
           CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
         run: |
           echo "[]" > comments &&
           python3 llvm/utils/git/code-lint-helper.py \
+            --linter clang-tidy \
             --token ${{ secrets.GITHUB_TOKEN }} \
             --issue-number $GITHUB_PR_NUMBER \
             --start-rev HEAD~1 \
             --end-rev HEAD \
             --verbose \
             --changed-files "$CHANGED_FILES"
-      
+
+      - name: Run doc8 linter
+        env:
+          GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          python3 llvm/utils/git/code-lint-helper.py \
+            --linter doc8 \
+            --token ${{ secrets.GITHUB_TOKEN }} \
+            --issue-number $GITHUB_PR_NUMBER \
+            --start-rev HEAD~1 \
+            --end-rev HEAD \
+            --verbose
+
       - name: Upload results
         uses: actions/upload-artifact at 330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         if: always()
diff --git a/llvm/utils/git/code-lint-helper.py b/llvm/utils/git/code-lint-helper.py
index 1232f3ab0d370..fc2068b438209 100755
--- a/llvm/utils/git/code-lint-helper.py
+++ b/llvm/utils/git/code-lint-helper.py
@@ -34,6 +34,8 @@ class LintArgs:
     issue_number: int = 0
     build_path: str = "build"
     clang_tidy_binary: str = "clang-tidy"
+    doc8_binary: str = "doc8"
+    linter: str = None
 
     def __init__(self, args: argparse.Namespace = None) -> None:
         if not args is None:
@@ -46,9 +48,12 @@ def __init__(self, args: argparse.Namespace = None) -> None:
             self.verbose = args.verbose
             self.build_path = args.build_path
             self.clang_tidy_binary = args.clang_tidy_binary
+            self.doc8_binary = args.doc8_binary
+            self.linter = args.linter
 
 
-COMMENT_TAG = "<!--LLVM CODE LINT COMMENT: clang-tidy-->"
+COMMENT_TAG_CLANG_TIDY = "<!--LLVM CODE LINT COMMENT: clang-tidy-->"
+COMMENT_TAG_DOC8 = "<!--LLVM CODE LINT COMMENT: doc8-->"
 
 
 def get_instructions(cpp_files: List[str]) -> str:
@@ -135,13 +140,22 @@ def create_comment_text(warning: str, cpp_files: List[str]) -> str:
 """
 
 
-def find_comment(pr: any) -> any:
+def find_comment(pr: any, args: LintArgs) -> any:
+    comment_tag = get_comment_tag(args.linter)
     for comment in pr.as_issue().get_comments():
-        if COMMENT_TAG in comment.body:
+        if comment_tag in comment.body:
             return comment
     return None
 
 
+def get_comment_tag(linter: str) -> str:
+    if linter == "clang-tidy":
+        return COMMENT_TAG_CLANG_TIDY
+    elif linter == "doc8":
+        return COMMENT_TAG_DOC8
+    raise ValueError(f"Unknown linter: {linter}")
+
+
 def create_comment(
     comment_text: str, args: LintArgs, create_new: bool
 ) -> Optional[dict]:
@@ -150,9 +164,10 @@ def create_comment(
     repo = github.Github(args.token).get_repo(args.repo)
     pr = repo.get_issue(args.issue_number).as_pull_request()
 
-    comment_text = COMMENT_TAG + "\n\n" + comment_text
+    comment_tag = get_comment_tag(args.linter)
+    comment_text = comment_tag + "\n\n" + comment_text
 
-    existing_comment = find_comment(pr)
+    existing_comment = find_comment(pr, args)
 
     comment = None
     if create_new or existing_comment:
@@ -215,7 +230,126 @@ def run_clang_tidy(changed_files: List[str], args: LintArgs) -> Optional[str]:
     return clean_clang_tidy_output(proc.stdout.strip())
 
 
-def run_linter(changed_files: List[str], args: LintArgs) -> tuple[bool, Optional[dict]]:
+
+def clean_doc8_output(output: str) -> Optional[str]:
+    if not output:
+        return None
+
+    lines = output.split("\n")
+    cleaned_lines = []
+    in_summary = False
+
+    for line in lines:
+        if line.startswith("Scanning...") or line.startswith("Validating..."):
+            continue
+        if line.startswith("========"):
+            in_summary = True
+            continue
+        if in_summary:
+            continue
+        if line.strip():
+            cleaned_lines.append(line)
+
+    if cleaned_lines:
+        return "\n".join(cleaned_lines)
+    return None
+
+
+def get_doc8_instructions() -> str:
+    # TODO: use git diff
+    return "doc8 ./clang-tools-extra/docs/clang-tidy/checks/"
+
+
+def create_doc8_comment_text(doc8_output: str) -> str:
+    instructions = get_doc8_instructions()
+    return f"""
+:warning: Documentation linter doc8 found issues in your code. :warning:
+
+<details>
+<summary>
+You can test this locally with the following command:
+</summary>
+
+```bash
+{instructions}
+```
+
+</details>
+
+<details>
+<summary>
+View the output from doc8 here.
+</summary>
+
+```
+{doc8_output}
+```
+
+</details>
+"""
+
+
+def run_doc8(args: LintArgs) -> tuple[int, Optional[str]]:
+    doc8_cmd = [args.doc8_binary, "./clang-tools-extra/docs/clang-tidy/checks/"]
+
+    if args.verbose:
+        print(f"Running doc8: {' '.join(doc8_cmd)}")
+
+    proc = subprocess.run(
+        doc8_cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    cleaned_output = clean_doc8_output(proc.stdout.strip())
+    if proc.returncode != 0 and cleaned_output is None:
+        # Infrastructure failure
+        return proc.returncode, proc.stderr.strip()
+
+    return proc.returncode, cleaned_output
+
+
+def run_doc8_linter(args: LintArgs) -> tuple[bool, Optional[dict]]:
+    returncode, result = run_doc8(args)
+    should_update_gh = args.token is not None and args.repo is not None
+    comment = None
+
+    if returncode == 0:
+        if should_update_gh:
+            comment_text = (
+                ":white_check_mark: With the latest revision "
+                "this PR passed the documentation linter."
+            )
+            comment = create_comment(comment_text, args, create_new=False)
+        return True, comment
+    else:
+        if should_update_gh:
+            if result:
+                comment_text = create_doc8_comment_text(result)
+                comment = create_comment(comment_text, args, create_new=True)
+            else:
+                comment_text = (
+                    ":warning: The documentation linter failed without printing "
+                    "an output. Check the logs for output. :warning:"
+                )
+                comment = create_comment(comment_text, args, create_new=False)
+        else:
+            if result:
+                print(
+                    "Warning: Documentation linter, doc8 detected "
+                    "some issues with your code..."
+                )
+                print(result)
+            else:
+                print("Warning: Documentation linter, doc8 failed to run.")
+        return False, comment
+
+
+def run_clang_tidy_linter(
+    changed_files: List[str], args: LintArgs
+) -> tuple[bool, Optional[dict]]:
     changed_files = [arg for arg in changed_files if "third-party" not in arg]
 
     cpp_files = filter_changed_files(changed_files)
@@ -255,6 +389,13 @@ def run_linter(changed_files: List[str], args: LintArgs) -> tuple[bool, Optional
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--linter",
+        type=str,
+        choices=["clang-tidy", "doc8"],
+        required=True,
+        help="The linter to run.",
+    )
     parser.add_argument(
         "--token", type=str, required=True, help="GitHub authentication token"
     )
@@ -291,6 +432,12 @@ def run_linter(changed_files: List[str], args: LintArgs) -> tuple[bool, Optional
         default="clang-tidy",
         help="Path to clang-tidy binary",
     )
+    parser.add_argument(
+        "--doc8-binary",
+        type=str,
+        default="doc8",
+        help="Path to doc8 binary",
+    )
     parser.add_argument(
         "--verbose", action="store_true", default=True, help="Verbose output"
     )
@@ -298,32 +445,44 @@ def run_linter(changed_files: List[str], args: LintArgs) -> tuple[bool, Optional
     parsed_args = parser.parse_args()
     args = LintArgs(parsed_args)
 
-    changed_files = []
-    if args.changed_files:
-        changed_files = args.changed_files.split(",")
-
-    if args.verbose:
-        print(f"got changed files: {changed_files}")
-
     if args.verbose:
-        print("running linter clang-tidy")
+        print(f"running linter {args.linter}")
 
-    success, comment = run_linter(changed_files, args)
+    success, comment = False, None
+    if args.linter == "clang-tidy":
+        changed_files = []
+        if args.changed_files:
+            changed_files = args.changed_files.split(",")
+        if args.verbose:
+            print(f"got changed files: {changed_files}")
+        success, comment = run_clang_tidy_linter(changed_files, args)
+    elif args.linter == "doc8":
+        success, comment = run_doc8_linter(args)
 
     if not success:
         if args.verbose:
-            print("linter clang-tidy failed")
+            print(f"linter {args.linter} failed")
 
     # Write comments file if we have a comment
     if comment:
+        import json
         if args.verbose:
-            print(f"linter clang-tidy has comment: {comment}")
+            print(f"linter {args.linter} has comment: {comment}")
 
-        with open("comments", "w") as f:
-            import json
+        existing_comments = []
+        if os.path.exists("comments"):
+            with open("comments", "r") as f:
+                try:
+                    existing_comments = json.load(f)
+                except json.JSONDecodeError:
+                    # File might be empty or invalid, start fresh
+                    pass
 
-            json.dump([comment], f)
+        existing_comments.append(comment)
+
+        with open("comments", "w") as f:
+            json.dump(existing_comments, f)
 
     if not success:
-        print("error: some linters failed: clang-tidy")
+        print(f"error: linter {args.linter} failed")
         sys.exit(1)

>From 8fc630014edd5046c39d9da8831cad5a4f0d23b2 Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Thu, 20 Nov 2025 13:43:18 +0800
Subject: [PATCH 2/2] Try to make CI work

---
 .github/workflows/pr-code-lint.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/pr-code-lint.yml b/.github/workflows/pr-code-lint.yml
index 60c1900000e5e..33b84117fea8b 100644
--- a/.github/workflows/pr-code-lint.yml
+++ b/.github/workflows/pr-code-lint.yml
@@ -76,6 +76,9 @@ jobs:
                 clang-tablegen-targets \
                 genconfusable               # for "ConfusableIdentifierCheck.h"
 
+      - name: Install linter dependencies
+        run: pip install doc8 --break-system-packages
+
       - name: Run clang-tidy linter
         env:
           GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}



More information about the llvm-commits mailing list