[llvm] 0a53f43 - [utils] support "Reverts ${PR}" in commit messages (#112226)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 16 14:39:40 PDT 2024
Author: George Burgess IV
Date: 2024-10-16T14:39:36-07:00
New Revision: 0a53f43c0c7e33cde07b24169e8f45db7eba2fea
URL: https://github.com/llvm/llvm-project/commit/0a53f43c0c7e33cde07b24169e8f45db7eba2fea
DIFF: https://github.com/llvm/llvm-project/commit/0a53f43c0c7e33cde07b24169e8f45db7eba2fea.diff
LOG: [utils] support "Reverts ${PR}" in commit messages (#112226)
A bisection in ChromeOS ended at a reverted commit, which wasn't flagged
by this revert checking script, since it used `Reverts ${PR}` rather
than `This reverts commit ${SHA}`.
`grep` says that somewhere around 400 reverts in the last year have used
`Reverts ${PR}` syntax. Support it.
Tested in part by running the command that was expected to catch this
revert:
```
$ ./revert_checker.py -C ~/llvm/main/ \
3b5e7c83a6e226d5bd7ed2e9b67449b64812074c origin/main \
| grep -q 4b0276d1c9cb558f3c20736dce802ceb26c0b958
$ echo $?
0
```
Added:
Modified:
llvm/utils/revert_checker.py
llvm/utils/revert_checker_test.py
Removed:
################################################################################
diff --git a/llvm/utils/revert_checker.py b/llvm/utils/revert_checker.py
index da80bdff868571..b1c6e228e4d419 100755
--- a/llvm/utils/revert_checker.py
+++ b/llvm/utils/revert_checker.py
@@ -45,35 +45,78 @@
import re
import subprocess
import sys
-from typing import Generator, List, NamedTuple, Iterable
+from typing import Dict, Generator, Iterable, List, NamedTuple, Optional, Tuple
assert sys.version_info >= (3, 6), "Only Python 3.6+ is supported."
# People are creative with their reverts, and heuristics are a bit
diff icult.
-# Like 90% of of reverts have "This reverts commit ${full_sha}".
-# Some lack that entirely, while others have many of them specified in ad-hoc
-# ways, while others use short SHAs and whatever.
+# At a glance, most reverts have "This reverts commit ${full_sha}". Many others
+# have `Reverts llvm/llvm-project#${PR_NUMBER}`.
#
-# The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
-# starts involving human intervention, which is probably not worth it for now.
+# By their powers combined, we should be able to automatically catch something
+# like 80% of reverts with reasonable confidence. At some point, human
+# intervention will always be required (e.g., I saw
+# ```
+# This reverts commit ${commit_sha_1} and
+# also ${commit_sha_2_shorthand}
+# ```
+# during my sample)
+
+_CommitMessageReverts = NamedTuple(
+ "_CommitMessageReverts",
+ [
+ ("potential_shas", List[str]),
+ ("potential_pr_numbers", List[int]),
+ ],
+)
+
+def _try_parse_reverts_from_commit_message(
+ commit_message: str,
+) -> _CommitMessageReverts:
+ """Tries to parse revert SHAs and LLVM PR numbers form the commit message.
-def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]:
+ Returns:
+ A namedtuple containing:
+ - A list of potentially reverted SHAs
+ - A list of potentially reverted LLVM PR numbers
+ """
if not commit_message:
- return []
+ return _CommitMessageReverts([], [])
- results = re.findall(r"This reverts commit ([a-f0-9]{40})\b", commit_message)
+ sha_reverts = re.findall(
+ r"This reverts commit ([a-f0-9]{40})\b",
+ commit_message,
+ )
first_line = commit_message.splitlines()[0]
initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
if initial_revert:
- results.append(initial_revert.group(1))
- return results
+ sha_reverts.append(initial_revert.group(1))
+ pr_numbers = [
+ int(x)
+ for x in re.findall(
+ r"Reverts llvm/llvm-project#(\d+)",
+ commit_message,
+ )
+ ]
+
+ return _CommitMessageReverts(
+ potential_shas=sha_reverts,
+ potential_pr_numbers=pr_numbers,
+ )
-def _stream_stdout(command: List[str]) -> Generator[str, None, None]:
+
+def _stream_stdout(
+ command: List[str], cwd: Optional[str] = None
+) -> Generator[str, None, None]:
with subprocess.Popen(
- command, stdout=subprocess.PIPE, encoding="utf-8", errors="replace"
+ command,
+ cwd=cwd,
+ stdout=subprocess.PIPE,
+ encoding="utf-8",
+ errors="replace",
) as p:
assert p.stdout is not None # for mypy's happiness.
yield from p.stdout
@@ -175,10 +218,43 @@ def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
).strip()
-def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
+def _load_pr_commit_mappings(
+ git_dir: str, root: str, min_ref: str
+) -> Dict[int, List[str]]:
+ git_log = ["git", "log", "--format=%H %s", f"{min_ref}..{root}"]
+ results = collections.defaultdict(list)
+ pr_regex = re.compile(r"\s\(#(\d+)\)$")
+ for line in _stream_stdout(git_log, cwd=git_dir):
+ m = pr_regex.search(line)
+ if not m:
+ continue
+
+ pr_number = int(m.group(1))
+ sha = line.split(None, 1)[0]
+ # N.B., these are kept in log (read: reverse chronological) order,
+ # which is what's expected by `find_reverts`.
+ results[pr_number].append(sha)
+ return results
+
+
+# N.B., max_pr_lookback's default of 20K commits is arbitrary, but should be
+# enough for the 99% case of reverts: rarely should someone land a cleanish
+# revert of a >6 month old change...
+def find_reverts(
+ git_dir: str, across_ref: str, root: str, max_pr_lookback: int = 20000
+) -> List[Revert]:
"""Finds reverts across `across_ref` in `git_dir`, starting from `root`.
These reverts are returned in order of oldest reverts first.
+
+ Args:
+ git_dir: git directory to find reverts in.
+ across_ref: the ref to find reverts across.
+ root: the 'main' ref to look for reverts on.
+ max_pr_lookback: this function uses heuristics to map PR numbers to
+ SHAs. These heuristics require that commit history from `root` to
+ `some_parent_of_root` is loaded in memory. `max_pr_lookback` is how
+ many commits behind `across_ref` should be loaded in memory.
"""
across_sha = _rev_parse(git_dir, across_ref)
root_sha = _rev_parse(git_dir, root)
@@ -201,8 +277,41 @@ def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
)
all_reverts = []
+ # Lazily load PR <-> commit mappings, since it can be expensive.
+ pr_commit_mappings = None
for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
- reverts = _try_parse_reverts_from_commit_message(commit_message)
+ reverts, pr_reverts = _try_parse_reverts_from_commit_message(
+ commit_message,
+ )
+ if pr_reverts:
+ if pr_commit_mappings is None:
+ logging.info(
+ "Loading PR <-> commit mappings. This may take a moment..."
+ )
+ pr_commit_mappings = _load_pr_commit_mappings(
+ git_dir, root_sha, f"{across_sha}~{max_pr_lookback}"
+ )
+ logging.info(
+ "Loaded %d PR <-> commit mappings", len(pr_commit_mappings)
+ )
+
+ for reverted_pr_number in pr_reverts:
+ reverted_shas = pr_commit_mappings.get(reverted_pr_number)
+ if not reverted_shas:
+ logging.warning(
+ "No SHAs for reverted PR %d (commit %s)",
+ reverted_pr_number,
+ sha,
+ )
+ continue
+ logging.debug(
+ "Inferred SHAs %s for reverted PR %d (commit %s)",
+ reverted_shas,
+ reverted_pr_number,
+ sha,
+ )
+ reverts.extend(reverted_shas)
+
if not reverts:
continue
diff --git a/llvm/utils/revert_checker_test.py b/llvm/utils/revert_checker_test.py
index 9d992663c5be8a..c149be8dc0dd19 100755
--- a/llvm/utils/revert_checker_test.py
+++ b/llvm/utils/revert_checker_test.py
@@ -96,6 +96,7 @@ def test_reverted_noncommit_object_is_a_nop(self) -> None:
git_dir=get_llvm_project_path(),
across_ref="c9944df916e41b1014dff5f6f75d52297b48ecdc~",
root="c9944df916e41b1014dff5f6f75d52297b48ecdc",
+ max_pr_lookback=50,
)
self.assertEqual(reverts, [])
@@ -113,6 +114,7 @@ def test_known_reverts_across_arbitrary_llvm_rev(self) -> None:
git_dir=get_llvm_project_path(),
across_ref="c47f971694be0159ffddfee8a75ae515eba91439",
root="9f981e9adf9c8d29bb80306daf08d2770263ade6",
+ max_pr_lookback=50,
)
self.assertEqual(
reverts,
@@ -128,6 +130,27 @@ def test_known_reverts_across_arbitrary_llvm_rev(self) -> None:
],
)
+ def test_pr_based_revert_works(self) -> None:
+ reverts = revert_checker.find_reverts(
+ git_dir=get_llvm_project_path(),
+ # This SHA is a direct child of the reverted SHA expected below.
+ across_ref="2d5f3b0a61fb171617012a2c3ba05fd31fb3bb1d",
+ # This SHA is a direct child of the revert SHA listed below.
+ root="2c01b278580212914ec037bb5dd9b73702dfe7f1",
+ max_pr_lookback=50,
+ )
+ self.assertEqual(
+ reverts,
+ [
+ revert_checker.Revert(
+ # This SHA is a `Reverts ${PR}` for #111004.
+ sha="50866e84d1da8462aeb96607bf6d9e5bbd5869c5",
+ # ...And this was the commit for #111004.
+ reverted_sha="67160c5ab5f5b7fd5fa7851abcfde367c8a9f91b",
+ ),
+ ],
+ )
+
if __name__ == "__main__":
unittest.main()
More information about the llvm-commits
mailing list