[llvm] [Github] Improve formating of PR diffs in bot notifications (PR #66118)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 23:57:49 PDT 2023
cor3ntin wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-github-workflow
<details>
<summary>Changes</summary>
* This avoid pinging folks on all issue when they got pinged on bugzilla eons ago
* Avoid formatting bugs when there is html in the issue description
* Truncate the list of files and the diff independently of each other. This avoids truncating cutting a file line in 2 and to cut in the middle of html markup. This is a fringe case but it does happen when people accidentally push weird branches conflicting on all the files.
Test: This is #<!-- -->66118 @<!-- -->cor3ntin
---
Full diff: https://github.com/llvm/llvm-project/pull/66118.diff
1 Files Affected:
- (modified) llvm/utils/git/github-automation.py (+47-16)
```diff
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index eac5816b5499f6a..8578a83262717ab 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -47,6 +47,21 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
return None
+def escape_description(str):
+ # https://github.com/github/markup/issues/1168#issuecomment-494946168
+ str = html.escape(str, False)
+ # '@' followed by alphanum is a user name
+ str = re.sub("@(?=\w+)", "@<!-- -->", str)
+ # '#' followed by digits is considered an issue number
+ str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
+ return str
+
+
+def sanitize_markdown_code_block(str):
+ # remove codeblocks terminators
+ return re.sub("^\s*```\s*$", r"` ` `", str)
+
+
class IssueSubscriber:
@property
def team_name(self) -> str:
@@ -67,12 +82,15 @@ def run(self) -> bool:
if team.slug == "issue-subscribers-good-first-issue":
comment = "{}\n".format(beginner_comment)
- comment = (
- f"@llvm/{team.slug}"
- + "\n\n<details>\n"
- + f"{self.issue.body}\n"
- + "</details>"
- )
+ body = escape_description(self.issue.body)
+
+ comment = f"""
+ at llvm/{team.slug}
+
+<details>
+{body}
+</details>
+"""
self.issue.create_comment(comment)
return True
@@ -113,6 +131,11 @@ def run(self) -> bool:
print(f"couldn't find team named {self.team_name}")
return False
+ # GitHub limits comments to 65,536 characters, let's limit the diff
+ # and the file list to 20kB each.
+ STAT_LIMIT = 20 * 1024
+ DIFF_LIMIT = 20 * 1024
+
# Get statistics for each file
diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
for file in self.pr.get_files():
@@ -125,35 +148,43 @@ def run(self) -> bool:
if file.status == "renamed":
print(f"(from {file.previous_filename})")
diff_stats += "\n"
- diff_stats += "\n"
+ if len(diff_stats) > STAT_LIMIT:
+ break
# Get the diff
try:
- patch = html.escape(requests.get(self.pr.diff_url).text)
+ patch = requests.get(self.pr.diff_url).text
except:
patch = ""
- diff_stats += "\n<pre>\n" + html.escape(patch)
- # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
- DIFF_LIMIT = 20 * 1024
+ patch = sanitize_markdown_code_block(patch)
+
patch_link = f"Full diff: {self.pr.diff_url}\n"
if len(patch) > DIFF_LIMIT:
patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
- diff_stats = diff_stats[0:DIFF_LIMIT] + "...\n<truncated>\n"
- diff_stats += "</pre>"
+ patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
team_mention = "@llvm/{}".format(team.slug)
- body = self.pr.body
+ body = escape_description(self.pr.body)
+ # Note: the comment is in markdown and the code below
+ # is sensible to line break
comment = f"""
{self.COMMENT_TAG}
{team_mention}
-
+
<details>
<summary>Changes</summary>
+
{body}
---
+---
{patch_link}
+
{diff_stats}
+
+```diff
+{patch}
+```
+
</details>
"""
```
</details>
https://github.com/llvm/llvm-project/pull/66118
More information about the llvm-commits
mailing list