[llvm] [NFC] Add a script to check for and fix trailing whitespace (PR #123496)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 19 14:04:44 PST 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {darker}-->
:warning: Python code formatter, darker found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
darker --check --diff -r 1797fb6b233c548817008b9c39c6af06d12cae99...41956f3ec4b60f4005c3197706fd2690cb284414 llvm/utils/trailing_whitespace.py llvm/utils/trailing_whitespace_test.py llvm/utils/git/code-format-helper.py
``````````
</details>
<details>
<summary>
View the diff from darker here.
</summary>
``````````diff
--- git/code-format-helper.py 2025-01-19 06:43:23.000000 +0000
+++ git/code-format-helper.py 2025-01-19 22:04:13.801177 +0000
@@ -427,11 +427,13 @@
name = "trailing_whitespace"
friendly_name = "Trailing whitespace formatter"
@property
def instructions(self) -> str:
- return f"python3 {self.trailing_whitespace_path} --exclude build " + " ".join(self.files)
+ return f"python3 {self.trailing_whitespace_path} --exclude build " + " ".join(
+ self.files
+ )
@property
def trailing_whitespace_path(self) -> str:
if "TRAILING_WHITESPACE" in os.environ:
return os.environ["TRAILING_WHITESPACE"]
@@ -468,11 +470,16 @@
else:
sys.stdout.write(proc.stdout.decode("utf-8"))
return None
-ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper(), TrailingWhitespaceFormatter())
+ALL_FORMATTERS = (
+ DarkerFormatHelper(),
+ ClangFormatHelper(),
+ UndefGetFormatHelper(),
+ TrailingWhitespaceFormatter(),
+)
def hook_main():
# fill out args
args = FormatArgs()
--- trailing_whitespace.py 2025-01-19 06:43:23.000000 +0000
+++ trailing_whitespace.py 2025-01-19 22:04:13.870463 +0000
@@ -1,10 +1,11 @@
import argparse
import os
import re
import subprocess
import sys
+
def is_text(file):
text_file_extensions = {
".apinotes",
".asm",
@@ -69,10 +70,11 @@
".yaml",
}
_, ext = os.path.splitext(file)
return ext.lower() in text_file_extensions
+
def check_file(path, fix):
try:
trailing = False
with open(path, "r", encoding="utf-8", errors="ignore") as f:
for line_number, line in enumerate(f, 1):
@@ -89,19 +91,22 @@
print(f"Warning: Encoding error encountered for {path}")
except FileNotFoundError:
print(f"Warning: Could not open {path}")
return False
+
def check_paths(paths, exclude, fix):
exclude = [os.path.abspath(d) for d in exclude]
seen = set()
found_trailing = False
for path in paths:
if os.path.abspath(path) in exclude:
continue
for root, dirs, files in os.walk(path):
- dirs[:] = [d for d in dirs if os.path.abspath(os.path.join(root, d)) not in exclude]
+ dirs[:] = [
+ d for d in dirs if os.path.abspath(os.path.join(root, d)) not in exclude
+ ]
for file in files:
file_path = os.path.join(root, file)
if not is_text(file):
continue
if file_path in seen:
@@ -109,12 +114,14 @@
seen.add(file_path)
if check_file(file_path, fix):
found_trailing = True
return found_trailing
+
HUNK_HEADER_REGEX = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
DIFF_HEADER_REGEX = re.compile(r"^diff --git a/(.+) b/(.+)")
+
def parse_diffs(diff_text):
diffs: dict[str, list[tuple[int, str]]] = dict()
file = None
current_line = None
@@ -128,17 +135,18 @@
continue
match = HUNK_HEADER_REGEX.match(line)
if match:
current_line = int(match.groups()[2])
continue
- if not current_line: # haven't seen the hunk header yet, continue
+ if not current_line: # haven't seen the hunk header yet, continue
continue
if line.startswith("+"):
line = line[1:]
diffs[file].append((current_line, line))
current_line += 1
return diffs
+
def check_paths_diff(paths, exclude, rev_start, rev_end):
exclude = [os.path.abspath(d) for d in exclude]
cmd = ["git", "diff", "-U0", rev_start, rev_end, *paths]
proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
@@ -154,22 +162,26 @@
if line.endswith(" "):
print(f"{file}:{num}: Trailing whitespace found")
found_trailing = True
return found_trailing
+
if __name__ == "__main__":
script_path = os.path.abspath(__file__)
parser = argparse.ArgumentParser()
parser.add_argument(
- "--fix", action=argparse.BooleanOptionalAction, default=False, help="Automatically apply fixes"
+ "--fix",
+ action=argparse.BooleanOptionalAction,
+ default=False,
+ help="Automatically apply fixes",
)
parser.add_argument(
"--exclude",
action="append",
default=[],
- help="Paths to exclude. Can be used multiple times."
+ help="Paths to exclude. Can be used multiple times.",
)
parser.add_argument(
"--diff",
help="Compute only based on changed lines (format: rev_start..rev_end)",
)
--- trailing_whitespace_test.py 2025-01-19 06:43:23.000000 +0000
+++ trailing_whitespace_test.py 2025-01-19 22:04:13.890832 +0000
@@ -1,8 +1,9 @@
import unittest
import trailing_whitespace
+
class Test(unittest.TestCase):
"""Tests for trailing_whitespace."""
def test_is_text(self) -> None:
@@ -18,42 +19,61 @@
+++ b/clang/test/CodeGen/memalign-libcall.c
@@ -12 +12,2 @@ void *test(size_t alignment, size_t size) {
-// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }
\\ No newline at end of file
+// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }"""
- self.assertEqual(trailing_whitespace.parse_diffs(basic_diff), {
- 'clang/test/CodeGen/memalign-libcall.c': [
- (12, "// CHECK: attributes #2 = { nobuiltin \"no-builtin-memalign\" }")
- ]
- })
+ self.assertEqual(
+ trailing_whitespace.parse_diffs(basic_diff),
+ {
+ "clang/test/CodeGen/memalign-libcall.c": [
+ (
+ 12,
+ '// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }',
+ )
+ ]
+ },
+ )
multiple_added_lines = """diff --git a/clang/test/CodeGen/memalign-libcall.c b/clang/test/CodeGen/memalign-libcall.c
index 2070eebdbf84..4fe1a838d15f 100644
--- a/clang/test/CodeGen/memalign-libcall.c
+++ b/clang/test/CodeGen/memalign-libcall.c
@@ -12 +12,2 @@ void *test(size_t alignment, size_t size) {
-// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }
\\ No newline at end of file
+// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }
+foobar"""
- self.assertEqual(trailing_whitespace.parse_diffs(multiple_added_lines), {
- 'clang/test/CodeGen/memalign-libcall.c': [
- (12, "// CHECK: attributes #2 = { nobuiltin \"no-builtin-memalign\" }"),
- (13, "foobar"),
- ]
- })
+ self.assertEqual(
+ trailing_whitespace.parse_diffs(multiple_added_lines),
+ {
+ "clang/test/CodeGen/memalign-libcall.c": [
+ (
+ 12,
+ '// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }',
+ ),
+ (13, "foobar"),
+ ]
+ },
+ )
multiple_deleted = """diff --git a/clang/test/CodeGen/memalign-libcall.c b/clang/test/CodeGen/memalign-libcall.c
index 2070eebdbf84..4fe1a838d15f 100644
--- a/clang/test/CodeGen/memalign-libcall.c
+++ b/clang/test/CodeGen/memalign-libcall.c
@@ -12 +12,2 @@ void *test(size_t alignment, size_t size) {
-foobar
-// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }
\\ No newline at end of file
+// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }"""
- self.assertEqual(trailing_whitespace.parse_diffs(multiple_deleted), {
- 'clang/test/CodeGen/memalign-libcall.c': [
- (12, "// CHECK: attributes #2 = { nobuiltin \"no-builtin-memalign\" }"),
- ]
- })
+ self.assertEqual(
+ trailing_whitespace.parse_diffs(multiple_deleted),
+ {
+ "clang/test/CodeGen/memalign-libcall.c": [
+ (
+ 12,
+ '// CHECK: attributes #2 = { nobuiltin "no-builtin-memalign" }',
+ ),
+ ]
+ },
+ )
+
if __name__ == "__main__":
unittest.main()
``````````
</details>
https://github.com/llvm/llvm-project/pull/123496
More information about the llvm-commits
mailing list