[clang-tools-extra] [clang] [llvm] code-format: Improve the code-format-helper to be able to run as a git hook (PR #73957)

Tobias Hieta via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 11 00:25:44 PST 2023


https://github.com/tru updated https://github.com/llvm/llvm-project/pull/73957

>From 2791b93517fbffec8757ab994246a98b4fd9d727 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias at hieta.se>
Date: Mon, 2 Oct 2023 09:42:33 +0200
Subject: [PATCH 1/4] [workflow] Fix abi checker in llvm-tests. Same fix as in
 99fb0af80d16b0ff886f032441392219e1cac452

---
 .github/workflows/llvm-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 428235b72fa5ad..cc9855ce182b2b 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -170,14 +170,17 @@ jobs:
         uses: actions/download-artifact at v3
         with:
           name: build-baseline
+          path: build-baseline
       - name: Download latest
         uses: actions/download-artifact at v3
         with:
           name: build-latest
+          path: build-latest
       - name: Download symbol list
         uses: actions/download-artifact at v3
         with:
           name: symbol-list
+          path: symbol-list
 
       - name: Install abi-compliance-checker
         run: sudo apt-get install abi-compliance-checker

>From 1e5407c86845c580864d0b4d998622b5fc04f3cc Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias at hieta.se>
Date: Thu, 30 Nov 2023 16:50:40 +0100
Subject: [PATCH 2/4] code-format: Improve the code-format-helper to be able to
 run as a git hook

As part of #73798 there was some discussion about using the format
helper to run from a git-hook. That was not possible for a number of
reasons, but with these changes it can now be installed as a hook and
then run on the local cache in git instead of a diff between revisions.

This also checks for two environment variables DARKER_FORMAT_PATH and
CLANG_FORMAT_PATH where you can specify the path to the program you want
to use.
---
 llvm/utils/git/code-format-helper.py | 179 ++++++++++++++++++++-------
 1 file changed, 134 insertions(+), 45 deletions(-)
 mode change 100644 => 100755 llvm/utils/git/code-format-helper.py

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
old mode 100644
new mode 100755
index f89f060b3fe5e8..409b8cc790efd4
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -14,8 +14,22 @@
 import sys
 from functools import cached_property
 
-import github
-from github import IssueComment, PullRequest
+
+class FormatArgs:
+    start_rev: str = None
+    end_rev: str = None
+    repo: str = None
+    changed_files: list[str] = []
+    token: str = None
+    verbose: bool = True
+
+    def __init__(self, args: argparse.Namespace = None) -> None:
+        if not args is None:
+            self.start_rev = args.start_rev
+            self.end_rev = args.end_rev
+            self.repo = args.repo
+            self.token = args.token
+            self.changed_files = args.changed_files
 
 
 class FormatHelper:
@@ -31,9 +45,10 @@ def comment_tag(self) -> str:
     def instructions(self) -> str:
         raise NotImplementedError()
 
-    def format_run(
-        self, changed_files: list[str], args: argparse.Namespace
-    ) -> str | None:
+    def has_tool(self) -> bool:
+        raise NotImplementedError()
+
+    def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
         raise NotImplementedError()
 
     def pr_comment_text_for_diff(self, diff: str) -> str:
@@ -63,17 +78,16 @@ def pr_comment_text_for_diff(self, diff: str) -> str:
 </details>
 """
 
-    def find_comment(
-        self, pr: PullRequest.PullRequest
-    ) -> IssueComment.IssueComment | None:
+    def find_comment(self, pr: any) -> any:
         for comment in pr.as_issue().get_comments():
             if self.comment_tag in comment.body:
                 return comment
         return None
 
-    def update_pr(
-        self, comment_text: str, args: argparse.Namespace, create_new: bool
-    ) -> None:
+    def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
+        import github
+        from github import IssueComment, PullRequest
+
         repo = github.Github(args.token).get_repo(args.repo)
         pr = repo.get_issue(args.issue_number).as_pull_request()
 
@@ -85,17 +99,25 @@ def update_pr(
         elif create_new:
             pr.as_issue().create_comment(comment_text)
 
-    def run(self, changed_files: list[str], args: argparse.Namespace) -> bool:
+    def run(self, changed_files: list[str], args: FormatArgs) -> bool:
         diff = self.format_run(changed_files, args)
+        should_update_gh = args.token is not None and args.repo is not None
+
         if diff is None:
-            comment_text = f"""
-:white_check_mark: With the latest revision this PR passed the {self.friendly_name}.
-"""
-            self.update_pr(comment_text, args, create_new=False)
+            if should_update_gh:
+                comment_text = f"""
+    :white_check_mark: With the latest revision this PR passed the {self.friendly_name}.
+    """
+                self.update_pr(comment_text, args, create_new=False)
             return True
         elif len(diff) > 0:
-            comment_text = self.pr_comment_text_for_diff(diff)
-            self.update_pr(comment_text, args, create_new=True)
+            if should_update_gh:
+                comment_text = self.pr_comment_text_for_diff(diff)
+                self.update_pr(comment_text, args, create_new=True)
+            else:
+                print(
+                    f"Warning: {self.friendly_name}, {self.name} detected some issues with your code formatting..."
+                )
             return False
         else:
             # The formatter failed but didn't output a diff (e.g. some sort of
@@ -128,29 +150,47 @@ def filter_changed_files(self, changed_files: list[str]) -> list[str]:
                 filtered_files.append(path)
         return filtered_files
 
-    def format_run(
-        self, changed_files: list[str], args: argparse.Namespace
-    ) -> str | None:
+    @property
+    def clang_fmt_path(self) -> str:
+        if "CLANG_FORMAT_PATH" in os.environ:
+            return os.environ["CLANG_FORMAT_PATH"]
+        return "git-clang-format"
+
+    def has_tool(self) -> bool:
+        cmd = [self.clang_fmt_path, "-h"]
+        proc = None
+        try:
+            proc = subprocess.run(cmd, capture_output=True)
+        except:
+            return False
+        return proc.returncode == 0
+
+    def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
         cpp_files = self.filter_changed_files(changed_files)
         if not cpp_files:
             return None
-        cf_cmd = [
-            "git-clang-format",
-            "--diff",
-            args.start_rev,
-            args.end_rev,
-            "--",
-        ] + cpp_files
-        print(f"Running: {' '.join(cf_cmd)}")
+
+        cf_cmd = [self.clang_fmt_path, "--diff"]
+
+        if args.start_rev and args.end_rev:
+            cf_cmd.append(args.start_rev)
+            cf_cmd.append(args.end_rev)
+
+        cf_cmd.append("--")
+        cf_cmd += cpp_files
+
+        if args.verbose:
+            print(f"Running: {' '.join(cf_cmd)}")
         self.cf_cmd = cf_cmd
         proc = subprocess.run(cf_cmd, capture_output=True)
         sys.stdout.write(proc.stderr.decode("utf-8"))
 
         if proc.returncode != 0:
             # formatting needed, or the command otherwise failed
-            print(f"error: {self.name} exited with code {proc.returncode}")
-            # Print the diff in the log so that it is viewable there
-            print(proc.stdout.decode("utf-8"))
+            if args.verbose:
+                print(f"error: {self.name} exited with code {proc.returncode}")
+                # Print the diff in the log so that it is viewable there
+                print(proc.stdout.decode("utf-8"))
             return proc.stdout.decode("utf-8")
         else:
             sys.stdout.write(proc.stdout.decode("utf-8"))
@@ -174,29 +214,46 @@ def filter_changed_files(self, changed_files: list[str]) -> list[str]:
 
         return filtered_files
 
-    def format_run(
-        self, changed_files: list[str], args: argparse.Namespace
-    ) -> str | None:
+    @property
+    def darker_fmt_path(self) -> str:
+        if "DARKER_FORMAT_PATH" in os.environ:
+            return os.environ["DARKER_FORMAT_PATH"]
+        return "darker"
+
+    def has_tool(self) -> bool:
+        cmd = [self.darker_fmt_path, "--version"]
+        proc = None
+        try:
+            proc = subprocess.run(cmd, capture_output=True)
+        except:
+            return False
+        return proc.returncode == 0
+
+    def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
         py_files = self.filter_changed_files(changed_files)
         if not py_files:
             return None
         darker_cmd = [
-            "darker",
+            self.darker_fmt_path,
             "--check",
             "--diff",
-            "-r",
-            f"{args.start_rev}...{args.end_rev}",
-        ] + py_files
-        print(f"Running: {' '.join(darker_cmd)}")
+        ]
+        if args.start_rev and args.end_rev:
+            darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"]
+        darker_cmd += py_files
+        if args.verbose:
+            print(f"Running: {' '.join(darker_cmd)}")
         self.darker_cmd = darker_cmd
         proc = subprocess.run(darker_cmd, capture_output=True)
-        sys.stdout.write(proc.stderr.decode("utf-8"))
+        if args.verbose:
+            sys.stdout.write(proc.stderr.decode("utf-8"))
 
         if proc.returncode != 0:
             # formatting needed, or the command otherwise failed
-            print(f"error: {self.name} exited with code {proc.returncode}")
-            # Print the diff in the log so that it is viewable there
-            print(proc.stdout.decode("utf-8"))
+            if args.verbose:
+                print(f"error: {self.name} exited with code {proc.returncode}")
+                # Print the diff in the log so that it is viewable there
+                print(proc.stdout.decode("utf-8"))
             return proc.stdout.decode("utf-8")
         else:
             sys.stdout.write(proc.stdout.decode("utf-8"))
@@ -205,7 +262,39 @@ def format_run(
 
 ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
 
+
+def hook_main():
+    # fill out args
+    args = FormatArgs()
+    args.verbose = False
+
+    # find the changed files
+    cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
+    proc = subprocess.run(cmd, capture_output=True)
+    output = proc.stdout.decode("utf-8")
+    for line in output.splitlines():
+        args.changed_files.append(line)
+
+    failed_fmts = []
+    for fmt in ALL_FORMATTERS:
+        if fmt.has_tool():
+            if not fmt.run(args.changed_files, args):
+                failed_fmts.append(fmt.name)
+        else:
+            print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
+
+    if len(failed_fmts) > 0:
+        sys.exit(1)
+
+    sys.exit(0)
+
+
 if __name__ == "__main__":
+    script_path = os.path.abspath(__file__)
+    if ".git/hooks" in script_path:  # and "GIT_DIR" in os.environ:
+        hook_main()
+        sys.exit(0)
+
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--token", type=str, required=True, help="GitHub authentiation token"
@@ -232,7 +321,7 @@ def format_run(
         help="Comma separated list of files that has been changed",
     )
 
-    args = parser.parse_args()
+    args = FormatArgs(parser.parse_args())
 
     changed_files = []
     if args.changed_files:

>From 4c2af2681341930be7071c84f09c8417b54cf392 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias at hieta.se>
Date: Mon, 4 Dec 2023 14:51:11 +0100
Subject: [PATCH 3/4] Code review fixes

---
 llvm/utils/git/code-format-helper.py | 54 +++++++++++++++++++---------
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 409b8cc790efd4..024009ed8a51a0 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -1,25 +1,46 @@
 #!/usr/bin/env python3
 #
-# ====- code-format-helper, runs code formatters from the ci --*- python -*--==#
+# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
-# ==-------------------------------------------------------------------------==#
+# ==--------------------------------------------------------------------------------------==#
 
 import argparse
 import os
 import subprocess
 import sys
-from functools import cached_property
+from typing import List, Optional
+
+"""
+This script is run by GitHub actions to ensure that the code in PR's conform to
+the coding style of LLVM. It can also be installed as a pre-commit git hook to
+check the coding style before submitting it. The canonical source of this script
+is in the LLVM source tree under llvm/utils/git.
+
+For C/C++ code it uses clang-format and for Python code it uses darker (which
+in turn invokes black).
+
+You can learn more about the LLVM coding style on llvm.org:
+https://llvm.org/docs/CodingStandards.html
+
+You can install this script as a git hook by symlinking it to the .git/hooks
+directory:
+
+ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit
+
+You can control the exact path to clang-format or darker with the following
+environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH.
+"""
 
 
 class FormatArgs:
     start_rev: str = None
     end_rev: str = None
     repo: str = None
-    changed_files: list[str] = []
+    changed_files: List[str] = []
     token: str = None
     verbose: bool = True
 
@@ -48,7 +69,7 @@ def instructions(self) -> str:
     def has_tool(self) -> bool:
         raise NotImplementedError()
 
-    def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
+    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
         raise NotImplementedError()
 
     def pr_comment_text_for_diff(self, diff: str) -> str:
@@ -99,7 +120,7 @@ def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> No
         elif create_new:
             pr.as_issue().create_comment(comment_text)
 
-    def run(self, changed_files: list[str], args: FormatArgs) -> bool:
+    def run(self, changed_files: List[str], args: FormatArgs) -> bool:
         diff = self.format_run(changed_files, args)
         should_update_gh = args.token is not None and args.repo is not None
 
@@ -140,7 +161,7 @@ def instructions(self) -> str:
     def should_include_extensionless_file(self, path: str) -> bool:
         return path.startswith("libcxx/include")
 
-    def filter_changed_files(self, changed_files: list[str]) -> list[str]:
+    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
         filtered_files = []
         for path in changed_files:
             _, ext = os.path.splitext(path)
@@ -160,12 +181,12 @@ def has_tool(self) -> bool:
         cmd = [self.clang_fmt_path, "-h"]
         proc = None
         try:
-            proc = subprocess.run(cmd, capture_output=True)
+            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         except:
             return False
         return proc.returncode == 0
 
-    def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
+    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
         cpp_files = self.filter_changed_files(changed_files)
         if not cpp_files:
             return None
@@ -182,7 +203,7 @@ def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
         if args.verbose:
             print(f"Running: {' '.join(cf_cmd)}")
         self.cf_cmd = cf_cmd
-        proc = subprocess.run(cf_cmd, capture_output=True)
+        proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         sys.stdout.write(proc.stderr.decode("utf-8"))
 
         if proc.returncode != 0:
@@ -193,7 +214,6 @@ def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
                 print(proc.stdout.decode("utf-8"))
             return proc.stdout.decode("utf-8")
         else:
-            sys.stdout.write(proc.stdout.decode("utf-8"))
             return None
 
 
@@ -205,7 +225,7 @@ class DarkerFormatHelper(FormatHelper):
     def instructions(self) -> str:
         return " ".join(self.darker_cmd)
 
-    def filter_changed_files(self, changed_files: list[str]) -> list[str]:
+    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
         filtered_files = []
         for path in changed_files:
             name, ext = os.path.splitext(path)
@@ -224,12 +244,12 @@ def has_tool(self) -> bool:
         cmd = [self.darker_fmt_path, "--version"]
         proc = None
         try:
-            proc = subprocess.run(cmd, capture_output=True)
+            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         except:
             return False
         return proc.returncode == 0
 
-    def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
+    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
         py_files = self.filter_changed_files(changed_files)
         if not py_files:
             return None
@@ -244,7 +264,9 @@ def format_run(self, changed_files: list[str], args: FormatArgs) -> str | None:
         if args.verbose:
             print(f"Running: {' '.join(darker_cmd)}")
         self.darker_cmd = darker_cmd
-        proc = subprocess.run(darker_cmd, capture_output=True)
+        proc = subprocess.run(
+            darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
         if args.verbose:
             sys.stdout.write(proc.stderr.decode("utf-8"))
 
@@ -270,7 +292,7 @@ def hook_main():
 
     # find the changed files
     cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
-    proc = subprocess.run(cmd, capture_output=True)
+    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     output = proc.stdout.decode("utf-8")
     for line in output.splitlines():
         args.changed_files.append(line)

>From cbee49a8ee65b4f33cd7d8065910fb0444bf6eee Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias at hieta.se>
Date: Mon, 11 Dec 2023 09:23:15 +0100
Subject: [PATCH 4/4] Some more nits fixed

---
 llvm/utils/git/code-format-helper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 024009ed8a51a0..aa78bc4f0ba9cd 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -99,6 +99,8 @@ def pr_comment_text_for_diff(self, diff: str) -> str:
 </details>
 """
 
+    # TODO: any type should be replaced with the correct github type, but it requires refactoring to
+    # not require the github module to be installed everywhere.
     def find_comment(self, pr: any) -> any:
         for comment in pr.as_issue().get_comments():
             if self.comment_tag in comment.body:
@@ -313,7 +315,7 @@ def hook_main():
 
 if __name__ == "__main__":
     script_path = os.path.abspath(__file__)
-    if ".git/hooks" in script_path:  # and "GIT_DIR" in os.environ:
+    if ".git/hooks" in script_path:
         hook_main()
         sys.exit(0)
 



More information about the llvm-commits mailing list