[llvm] Add PR check to suggest alternatives to using undef (PR #118506)

Nuno Lopes via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 11 04:48:25 PST 2024


https://github.com/nunoplopes updated https://github.com/llvm/llvm-project/pull/118506

>From 726cf1db260b23b1391873df6752970173c5d2f0 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 14:06:52 +0000
Subject: [PATCH 01/10] Update code-format-helper.py

---
 llvm/utils/git/code-format-helper.py | 77 +++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 76b2a3e26be28a..2ece9813ae447b 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -10,6 +10,7 @@
 
 import argparse
 import os
+import re
 import subprocess
 import sys
 from typing import List, Optional
@@ -312,7 +313,81 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
             return None
 
 
-ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
+class UndefGetFormatHelper(FormatHelper):
+    name = "undef deprecator"
+    friendly_name = "undef deprecator"
+
+    @property
+    def instructions(self) -> str:
+        return " ".join(self.cmd)
+
+    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
+        filtered_files = []
+        for path in changed_files:
+            _, ext = os.path.splitext(path)
+            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"):
+                filtered_files.append(path)
+        return filtered_files
+
+    def has_tool(self) -> bool:
+        return True
+
+    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
+        files = self.filter_changed_files(changed_files)
+
+        regex = '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)'
+        cmd = ['git', 'diff', '-U0', '--pickaxe-regex', '-S', regex]
+
+        if args.start_rev and args.end_rev:
+            cmd.append(args.start_rev)
+            cmd.append(args.end_rev)
+
+        cmd += files
+
+        if args.verbose:
+            print(f"Running: {' '.join(f"'{c}'" for c in cmd)}")
+        self.cmd = cmd
+        proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        sys.stdout.write(proc.stderr.decode("utf-8"))
+        stdout = proc.stdout.decode("utf-8")
+
+        files = []
+        for file in re.split('^diff --git ', stdout, 0, re.MULTILINE):
+            if re.search('^[+].*'+regex, file, re.MULTILINE):
+                files.append(re.match('a/([^ ]+)', file.splitlines()[0])[1])
+
+        if files:
+            files = '\n'.join(files)
+            report = f'''
+The following files introduce new uses of undef:
+{files}
+
+Undef is now deprecated and should only be used in the rare cases where no
+replacement is possible. For example, load of uninitialized memory yields undef.
+You should use poison values for placeholders instead.
+
+In tests, avoid using undef and having tests that trigger undefined behavior.
+If you need a value with some unimportant value, you can add a new argument
+to the function and use that instead.
+
+For example, this is considered a bad practice:
+define void @fn() {{
+  ...
+  br i1 undef, ...
+}}
+
+Use the following instead:
+define void @fn(i1 %cond) {{
+  ...
+  br i1 %cond, ...
+}}
+'''
+            return report
+        else:
+            return None
+
+
+ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())
 
 
 def hook_main():

>From 211779a24857d1537969e56d2dd3bd81413eb7c7 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 14:07:32 +0000
Subject: [PATCH 02/10] Update pr-code-format.yml

---
 .github/workflows/pr-code-format.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index f2bb37316d3a8b..15a309900ee85f 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -16,7 +16,7 @@ jobs:
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
       cancel-in-progress: true
-    if: github.repository == 'llvm/llvm-project'
+    if: github.repository == 'nunoplopes/llvm-project'
     steps:
       - name: Fetch LLVM sources
         uses: actions/checkout at v4

>From 8aff8b1a07030bee929a549e3cb5c25d85832a1d Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 14:16:30 +0000
Subject: [PATCH 03/10] Update code-format-helper.py

---
 llvm/utils/git/code-format-helper.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 2ece9813ae447b..62fc98fa1be5ea 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -382,6 +382,9 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
   br i1 %cond, ...
 }}
 '''
+            if args.verbose:
+                print(f"error: {self.name} failed")
+                print(report)
             return report
         else:
             return None

>From 392e0879679a955f3c1b6c5de20d83cb8a79e7e0 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 15:38:37 +0000
Subject: [PATCH 04/10] Update pr-code-format.yml

---
 .github/workflows/pr-code-format.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 15a309900ee85f..f2bb37316d3a8b 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -16,7 +16,7 @@ jobs:
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
       cancel-in-progress: true
-    if: github.repository == 'nunoplopes/llvm-project'
+    if: github.repository == 'llvm/llvm-project'
     steps:
       - name: Fetch LLVM sources
         uses: actions/checkout at v4

>From a073c96127d9a8762c061a40048b362c96483f84 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 15:49:30 +0000
Subject: [PATCH 05/10] Update code-format-helper.py

---
 llvm/utils/git/code-format-helper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 62fc98fa1be5ea..7ba19dbb95a82f 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -345,7 +345,8 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
         cmd += files
 
         if args.verbose:
-            print(f"Running: {' '.join(f"'{c}'" for c in cmd)}")
+            cmd_str = ' '.join(f"'{c}'" for c in cmd)
+            print(f"Running: {cmd_str}")
         self.cmd = cmd
         proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         sys.stdout.write(proc.stderr.decode("utf-8"))

>From a1f3ad4d70203fe7b9a7e91d98b8da482c7119dc Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 15:54:41 +0000
Subject: [PATCH 06/10] Update code-format-helper.py

---
 llvm/utils/git/code-format-helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 7ba19dbb95a82f..6a7658f85099d5 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -345,7 +345,7 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
         cmd += files
 
         if args.verbose:
-            cmd_str = ' '.join(f"'{c}'" for c in cmd)
+            cmd_str = " ".join(f"'{c}'" for c in cmd)
             print(f"Running: {cmd_str}")
         self.cmd = cmd
         proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

>From 5ea70dccea6c2f4dcd1dd58a15cc26f1084b963d Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 16:05:44 +0000
Subject: [PATCH 07/10] wrap command in quotes to prevent issues when
 copy-pasting to run locally

---
 llvm/utils/git/code-format-helper.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 6a7658f85099d5..9182515edd0843 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -319,7 +319,7 @@ class UndefGetFormatHelper(FormatHelper):
 
     @property
     def instructions(self) -> str:
-        return " ".join(self.cmd)
+        return " ".join(f"'{c}'" for c in self.cmd)
 
     def filter_changed_files(self, changed_files: List[str]) -> List[str]:
         filtered_files = []
@@ -343,11 +343,10 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
             cmd.append(args.end_rev)
 
         cmd += files
+        self.cmd = cmd
 
         if args.verbose:
-            cmd_str = " ".join(f"'{c}'" for c in cmd)
-            print(f"Running: {cmd_str}")
-        self.cmd = cmd
+            print(f"Running: {self.instructions}")
         proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         sys.stdout.write(proc.stderr.decode("utf-8"))
         stdout = proc.stdout.decode("utf-8")

>From d11783d41577b3ddfeb00942d36e0105a477988d Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Wed, 4 Dec 2024 14:38:45 +0000
Subject: [PATCH 08/10] update with review comments

---
 llvm/utils/git/code-format-helper.py | 79 +++++++++++++++++++---------
 1 file changed, 53 insertions(+), 26 deletions(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 9182515edd0843..841c3ac007fa5a 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -11,6 +11,7 @@
 import argparse
 import os
 import re
+import shlex
 import subprocess
 import sys
 from typing import List, Optional
@@ -319,7 +320,7 @@ class UndefGetFormatHelper(FormatHelper):
 
     @property
     def instructions(self) -> str:
-        return " ".join(f"'{c}'" for c in self.cmd)
+        return " ".join(shlex.quote(c) for c in self.cmd)
 
     def filter_changed_files(self, changed_files: List[str]) -> List[str]:
         filtered_files = []
@@ -332,11 +333,30 @@ def filter_changed_files(self, changed_files: List[str]) -> List[str]:
     def has_tool(self) -> bool:
         return True
 
+    def pr_comment_text_for_diff(self, diff: str) -> str:
+        return f"""
+:warning: {self.name} found issues in your code. :warning:
+
+<details>
+<summary>
+You can test this locally with the following command:
+</summary>
+
+``````````bash
+{self.instructions}
+``````````
+
+</details>
+
+{diff}
+"""
+
     def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
         files = self.filter_changed_files(changed_files)
 
-        regex = '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)'
-        cmd = ['git', 'diff', '-U0', '--pickaxe-regex', '-S', regex]
+        # Use git to find files that have had a change in the number of undefs
+        regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)"
+        cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex]
 
         if args.start_rev and args.end_rev:
             cmd.append(args.start_rev)
@@ -347,47 +367,54 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
 
         if args.verbose:
             print(f"Running: {self.instructions}")
-        proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        sys.stdout.write(proc.stderr.decode("utf-8"))
-        stdout = proc.stdout.decode("utf-8")
+
+        proc = subprocess.run(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
+        )
+        sys.stdout.write(proc.stderr)
+        stdout = proc.stdout
 
         files = []
-        for file in re.split('^diff --git ', stdout, 0, re.MULTILINE):
-            if re.search('^[+].*'+regex, file, re.MULTILINE):
-                files.append(re.match('a/([^ ]+)', file.splitlines()[0])[1])
+        # Split the diff so we have one array entry per file.
+        # Each file is prefixed like:
+        # diff --git a/file b/file
+        for file in re.split("^diff --git ", stdout, 0, re.MULTILINE):
+            # search for additions of undef
+            if re.search("^[+].*" + regex, file, re.MULTILINE):
+                files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1])
+
+        if not files:
+            return None
 
-        if files:
-            files = '\n'.join(files)
-            report = f'''
+        files = "\n".join(" - " + f for f in files)
+        report = f"""
 The following files introduce new uses of undef:
 {files}
 
-Undef is now deprecated and should only be used in the rare cases where no
-replacement is possible. For example, load of uninitialized memory yields undef.
-You should use poison values for placeholders instead.
+Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
 
-In tests, avoid using undef and having tests that trigger undefined behavior.
-If you need a value with some unimportant value, you can add a new argument
-to the function and use that instead.
+In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
 
 For example, this is considered a bad practice:
+```llvm
 define void @fn() {{
   ...
   br i1 undef, ...
 }}
+```
 
-Use the following instead:
+Please use the following instead:
+```llvm
 define void @fn(i1 %cond) {{
   ...
   br i1 %cond, ...
 }}
-'''
-            if args.verbose:
-                print(f"error: {self.name} failed")
-                print(report)
-            return report
-        else:
-            return None
+```
+"""
+        if args.verbose:
+            print(f"error: {self.name} failed")
+            print(report)
+        return report
 
 
 ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())

>From 8a54f0431c713d78e96e3b45fd2f3980be7945dd Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Thu, 5 Dec 2024 16:17:05 +0000
Subject: [PATCH 09/10] add langref link

---
 llvm/utils/git/code-format-helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 841c3ac007fa5a..5aadd9f4b2c400 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -391,7 +391,7 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
 The following files introduce new uses of undef:
 {files}
 
-Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
+[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
 
 In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
 

>From d7befbb080dc69c3fff307161adccbe962d2adda Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Wed, 11 Dec 2024 12:48:11 +0000
Subject: [PATCH 10/10] add link to the new reference manual

---
 llvm/utils/git/code-format-helper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 5aadd9f4b2c400..19264bca6ce8f6 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -410,6 +410,8 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
   br i1 %cond, ...
 }}
 ```
+
+Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.
 """
         if args.verbose:
             print(f"error: {self.name} failed")



More information about the llvm-commits mailing list