[llvm] Add PR check to suggest alternatives to using undef (PR #118506)
Nuno Lopes via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 04:48:25 PST 2024
https://github.com/nunoplopes updated https://github.com/llvm/llvm-project/pull/118506
>From 726cf1db260b23b1391873df6752970173c5d2f0 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 14:06:52 +0000
Subject: [PATCH 01/10] Update code-format-helper.py
---
llvm/utils/git/code-format-helper.py | 77 +++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 76b2a3e26be28a..2ece9813ae447b 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -10,6 +10,7 @@
import argparse
import os
+import re
import subprocess
import sys
from typing import List, Optional
@@ -312,7 +313,81 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
return None
-ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
+class UndefGetFormatHelper(FormatHelper):
+ name = "undef deprecator"
+ friendly_name = "undef deprecator"
+
+ @property
+ def instructions(self) -> str:
+ return " ".join(self.cmd)
+
+ def filter_changed_files(self, changed_files: List[str]) -> List[str]:
+ filtered_files = []
+ for path in changed_files:
+ _, ext = os.path.splitext(path)
+ if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"):
+ filtered_files.append(path)
+ return filtered_files
+
+ def has_tool(self) -> bool:
+ return True
+
+ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
+ files = self.filter_changed_files(changed_files)
+
+ regex = '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)'
+ cmd = ['git', 'diff', '-U0', '--pickaxe-regex', '-S', regex]
+
+ if args.start_rev and args.end_rev:
+ cmd.append(args.start_rev)
+ cmd.append(args.end_rev)
+
+ cmd += files
+
+ if args.verbose:
+ print(f"Running: {' '.join(f"'{c}'" for c in cmd)}")
+ self.cmd = cmd
+ proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sys.stdout.write(proc.stderr.decode("utf-8"))
+ stdout = proc.stdout.decode("utf-8")
+
+ files = []
+ for file in re.split('^diff --git ', stdout, 0, re.MULTILINE):
+ if re.search('^[+].*'+regex, file, re.MULTILINE):
+ files.append(re.match('a/([^ ]+)', file.splitlines()[0])[1])
+
+ if files:
+ files = '\n'.join(files)
+ report = f'''
+The following files introduce new uses of undef:
+{files}
+
+Undef is now deprecated and should only be used in the rare cases where no
+replacement is possible. For example, load of uninitialized memory yields undef.
+You should use poison values for placeholders instead.
+
+In tests, avoid using undef and having tests that trigger undefined behavior.
+If you need a value with some unimportant value, you can add a new argument
+to the function and use that instead.
+
+For example, this is considered a bad practice:
+define void @fn() {{
+ ...
+ br i1 undef, ...
+}}
+
+Use the following instead:
+define void @fn(i1 %cond) {{
+ ...
+ br i1 %cond, ...
+}}
+'''
+ return report
+ else:
+ return None
+
+
+ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())
def hook_main():
>From 211779a24857d1537969e56d2dd3bd81413eb7c7 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 14:07:32 +0000
Subject: [PATCH 02/10] Update pr-code-format.yml
---
.github/workflows/pr-code-format.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index f2bb37316d3a8b..15a309900ee85f 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -16,7 +16,7 @@ jobs:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
- if: github.repository == 'llvm/llvm-project'
+ if: github.repository == 'nunoplopes/llvm-project'
steps:
- name: Fetch LLVM sources
uses: actions/checkout at v4
>From 8aff8b1a07030bee929a549e3cb5c25d85832a1d Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 14:16:30 +0000
Subject: [PATCH 03/10] Update code-format-helper.py
---
llvm/utils/git/code-format-helper.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 2ece9813ae447b..62fc98fa1be5ea 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -382,6 +382,9 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
br i1 %cond, ...
}}
'''
+ if args.verbose:
+ print(f"error: {self.name} failed")
+ print(report)
return report
else:
return None
>From 392e0879679a955f3c1b6c5de20d83cb8a79e7e0 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 15:38:37 +0000
Subject: [PATCH 04/10] Update pr-code-format.yml
---
.github/workflows/pr-code-format.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 15a309900ee85f..f2bb37316d3a8b 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -16,7 +16,7 @@ jobs:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
- if: github.repository == 'nunoplopes/llvm-project'
+ if: github.repository == 'llvm/llvm-project'
steps:
- name: Fetch LLVM sources
uses: actions/checkout at v4
>From a073c96127d9a8762c061a40048b362c96483f84 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 15:49:30 +0000
Subject: [PATCH 05/10] Update code-format-helper.py
---
llvm/utils/git/code-format-helper.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 62fc98fa1be5ea..7ba19dbb95a82f 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -345,7 +345,8 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
cmd += files
if args.verbose:
- print(f"Running: {' '.join(f"'{c}'" for c in cmd)}")
+ cmd_str = ' '.join(f"'{c}'" for c in cmd)
+ print(f"Running: {cmd_str}")
self.cmd = cmd
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
sys.stdout.write(proc.stderr.decode("utf-8"))
>From a1f3ad4d70203fe7b9a7e91d98b8da482c7119dc Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 15:54:41 +0000
Subject: [PATCH 06/10] Update code-format-helper.py
---
llvm/utils/git/code-format-helper.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 7ba19dbb95a82f..6a7658f85099d5 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -345,7 +345,7 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
cmd += files
if args.verbose:
- cmd_str = ' '.join(f"'{c}'" for c in cmd)
+ cmd_str = " ".join(f"'{c}'" for c in cmd)
print(f"Running: {cmd_str}")
self.cmd = cmd
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
>From 5ea70dccea6c2f4dcd1dd58a15cc26f1084b963d Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Tue, 3 Dec 2024 16:05:44 +0000
Subject: [PATCH 07/10] wrap command in quotes to prevent issues when
copy-pasting to run locally
---
llvm/utils/git/code-format-helper.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 6a7658f85099d5..9182515edd0843 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -319,7 +319,7 @@ class UndefGetFormatHelper(FormatHelper):
@property
def instructions(self) -> str:
- return " ".join(self.cmd)
+ return " ".join(f"'{c}'" for c in self.cmd)
def filter_changed_files(self, changed_files: List[str]) -> List[str]:
filtered_files = []
@@ -343,11 +343,10 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
cmd.append(args.end_rev)
cmd += files
+ self.cmd = cmd
if args.verbose:
- cmd_str = " ".join(f"'{c}'" for c in cmd)
- print(f"Running: {cmd_str}")
- self.cmd = cmd
+ print(f"Running: {self.instructions}")
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
sys.stdout.write(proc.stderr.decode("utf-8"))
stdout = proc.stdout.decode("utf-8")
>From d11783d41577b3ddfeb00942d36e0105a477988d Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Wed, 4 Dec 2024 14:38:45 +0000
Subject: [PATCH 08/10] update with review comments
---
llvm/utils/git/code-format-helper.py | 79 +++++++++++++++++++---------
1 file changed, 53 insertions(+), 26 deletions(-)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 9182515edd0843..841c3ac007fa5a 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -11,6 +11,7 @@
import argparse
import os
import re
+import shlex
import subprocess
import sys
from typing import List, Optional
@@ -319,7 +320,7 @@ class UndefGetFormatHelper(FormatHelper):
@property
def instructions(self) -> str:
- return " ".join(f"'{c}'" for c in self.cmd)
+ return " ".join(shlex.quote(c) for c in self.cmd)
def filter_changed_files(self, changed_files: List[str]) -> List[str]:
filtered_files = []
@@ -332,11 +333,30 @@ def filter_changed_files(self, changed_files: List[str]) -> List[str]:
def has_tool(self) -> bool:
return True
+ def pr_comment_text_for_diff(self, diff: str) -> str:
+ return f"""
+:warning: {self.name} found issues in your code. :warning:
+
+<details>
+<summary>
+You can test this locally with the following command:
+</summary>
+
+``````````bash
+{self.instructions}
+``````````
+
+</details>
+
+{diff}
+"""
+
def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
files = self.filter_changed_files(changed_files)
- regex = '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)'
- cmd = ['git', 'diff', '-U0', '--pickaxe-regex', '-S', regex]
+ # Use git to find files that have had a change in the number of undefs
+ regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)"
+ cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex]
if args.start_rev and args.end_rev:
cmd.append(args.start_rev)
@@ -347,47 +367,54 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
if args.verbose:
print(f"Running: {self.instructions}")
- proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- sys.stdout.write(proc.stderr.decode("utf-8"))
- stdout = proc.stdout.decode("utf-8")
+
+ proc = subprocess.run(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
+ )
+ sys.stdout.write(proc.stderr)
+ stdout = proc.stdout
files = []
- for file in re.split('^diff --git ', stdout, 0, re.MULTILINE):
- if re.search('^[+].*'+regex, file, re.MULTILINE):
- files.append(re.match('a/([^ ]+)', file.splitlines()[0])[1])
+ # Split the diff so we have one array entry per file.
+ # Each file is prefixed like:
+ # diff --git a/file b/file
+ for file in re.split("^diff --git ", stdout, 0, re.MULTILINE):
+ # search for additions of undef
+ if re.search("^[+].*" + regex, file, re.MULTILINE):
+ files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1])
+
+ if not files:
+ return None
- if files:
- files = '\n'.join(files)
- report = f'''
+ files = "\n".join(" - " + f for f in files)
+ report = f"""
The following files introduce new uses of undef:
{files}
-Undef is now deprecated and should only be used in the rare cases where no
-replacement is possible. For example, load of uninitialized memory yields undef.
-You should use poison values for placeholders instead.
+Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
-In tests, avoid using undef and having tests that trigger undefined behavior.
-If you need a value with some unimportant value, you can add a new argument
-to the function and use that instead.
+In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
For example, this is considered a bad practice:
+```llvm
define void @fn() {{
...
br i1 undef, ...
}}
+```
-Use the following instead:
+Please use the following instead:
+```llvm
define void @fn(i1 %cond) {{
...
br i1 %cond, ...
}}
-'''
- if args.verbose:
- print(f"error: {self.name} failed")
- print(report)
- return report
- else:
- return None
+```
+"""
+ if args.verbose:
+ print(f"error: {self.name} failed")
+ print(report)
+ return report
ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())
>From 8a54f0431c713d78e96e3b45fd2f3980be7945dd Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Thu, 5 Dec 2024 16:17:05 +0000
Subject: [PATCH 09/10] add langref link
---
llvm/utils/git/code-format-helper.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 841c3ac007fa5a..5aadd9f4b2c400 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -391,7 +391,7 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
The following files introduce new uses of undef:
{files}
-Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
+[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
>From d7befbb080dc69c3fff307161adccbe962d2adda Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes at tecnico.ulisboa.pt>
Date: Wed, 11 Dec 2024 12:48:11 +0000
Subject: [PATCH 10/10] add link to the new reference manual
---
llvm/utils/git/code-format-helper.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 5aadd9f4b2c400..19264bca6ce8f6 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -410,6 +410,8 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
br i1 %cond, ...
}}
```
+
+Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.
"""
if args.verbose:
print(f"error: {self.name} failed")
More information about the llvm-commits
mailing list