[llvm] [Github] Improve formating of PR diffs in bot notifications (PR #66118)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 12:14:36 PDT 2023
https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/66118:
>From 72be28c61643044888c76b74b7b7e5d15b386ede Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 19:48:47 +0200
Subject: [PATCH 1/8] [Github] Escape `@` and html in the <details> block
* This avoid pinging folks on all issue when they got pinged on
bugzilla eaons ago
* Avoid formating bugs when there is html in the issue description
---
llvm/utils/git/github-automation.py | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index eac5816b5499f6a..02e47794126584a 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -12,6 +12,7 @@
from git import Repo # type: ignore
import html
import github
+import html
import os
import re
import requests
@@ -46,6 +47,9 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
return team
return None
+def escape_description(str):
+ # https://github.com/github/markup/issues/1168#issuecomment-494946168
+ return html.escape(str.replace("@", "@<!-- -->"), False)
class IssueSubscriber:
@property
@@ -67,12 +71,15 @@ def run(self) -> bool:
if team.slug == "issue-subscribers-good-first-issue":
comment = "{}\n".format(beginner_comment)
- comment = (
- f"@llvm/{team.slug}"
- + "\n\n<details>\n"
- + f"{self.issue.body}\n"
- + "</details>"
- )
+ body = escape_description(self.issue.body)
+
+ comment = ( f"""
+ at llvm/{team.slug}
+
+<details>
+{body}
+</details>
+""" )
self.issue.create_comment(comment)
return True
>From 42bb7d5940a1dfb6e385838687c770bc37c6512a Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 19:57:49 +0200
Subject: [PATCH 2/8] Fix order of operations: we should escape html first so
the comment does not get escaped
---
llvm/utils/git/github-automation.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 02e47794126584a..c1885ea4af8015e 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -49,7 +49,8 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
def escape_description(str):
# https://github.com/github/markup/issues/1168#issuecomment-494946168
- return html.escape(str.replace("@", "@<!-- -->"), False)
+ str = html.escape(str, False)
+ return str.replace("@", "@<!-- -->")
class IssueSubscriber:
@property
>From b4b4e8f48390212d5bcc296f121050db4c3fb3be Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 20:05:41 +0200
Subject: [PATCH 3/8] Formatting
---
llvm/utils/git/github-automation.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index c1885ea4af8015e..eeba5eaff97fb34 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -47,11 +47,13 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
return team
return None
+
def escape_description(str):
# https://github.com/github/markup/issues/1168#issuecomment-494946168
str = html.escape(str, False)
return str.replace("@", "@<!-- -->")
+
class IssueSubscriber:
@property
def team_name(self) -> str:
@@ -74,13 +76,13 @@ def run(self) -> bool:
body = escape_description(self.issue.body)
- comment = ( f"""
+ comment = f"""
@llvm/{team.slug}
<details>
{body}
</details>
-""" )
+"""
self.issue.create_comment(comment)
return True
>From d12147f1587e1f8ac29b31e159eb4e5efaefb810 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Wed, 13 Sep 2023 07:21:08 +0200
Subject: [PATCH 4/8] * Escape # * Escape PR description * Trunkate the list of
files if it's > 20K * Color the diff on github
---
llvm/utils/git/github-automation.py | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index eeba5eaff97fb34..2a372622954b4bd 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -51,7 +51,7 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
def escape_description(str):
# https://github.com/github/markup/issues/1168#issuecomment-494946168
str = html.escape(str, False)
- return str.replace("@", "@<!-- -->")
+ return str.replace("@", "@<!-- -->").replace("#", "#<!-- -->")
class IssueSubscriber:
@@ -123,6 +123,11 @@ def run(self) -> bool:
print(f"couldn't find team named {self.team_name}")
return False
+ # GitHub limits comments to 65,536 characters, let's limit the diff
+ # and the file list to 20kB each.
+ STAT_LIMIT = 20 * 1024
+ DIFF_LIMIT = 20 * 1024
+
# Get statistics for each file
diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
for file in self.pr.get_files():
@@ -133,37 +138,40 @@ def run(self) -> bool:
diff_stats += f"-{file.deletions}"
diff_stats += ") "
if file.status == "renamed":
- print(f"(from {file.previous_filename})")
+ print(f"(from {file.previous_filename})"
diff_stats += "\n"
- diff_stats += "\n"
+ if len(diff_stats) > STAT_LIMIT)
+ break
# Get the diff
try:
patch = html.escape(requests.get(self.pr.diff_url).text)
except:
patch = ""
- diff_stats += "\n<pre>\n" + html.escape(patch)
# GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
- DIFF_LIMIT = 20 * 1024
patch_link = f"Full diff: {self.pr.diff_url}\n"
if len(patch) > DIFF_LIMIT:
patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
- diff_stats = diff_stats[0:DIFF_LIMIT] + "...\n<truncated>\n"
- diff_stats += "</pre>"
+ patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n<truncated>\n"
team_mention = "@llvm/{}".format(team.slug)
- body = self.pr.body
+ body = escape_description(self.pr.body)
comment = f"""
{self.COMMENT_TAG}
{team_mention}
-
+
<details>
<summary>Changes</summary>
{body}
--
{patch_link}
+
{diff_stats}
+
+<pre lang="diff">
+{patch}
+</pre>
</details>
"""
>From b6456169661b49556dbc6b73baed1d64561bbf56 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Wed, 13 Sep 2023 07:25:29 +0200
Subject: [PATCH 5/8] Formatting
---
llvm/utils/git/github-automation.py | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 2a372622954b4bd..55a08a6eee17a63 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -12,7 +12,6 @@
from git import Repo # type: ignore
import html
import github
-import html
import os
import re
import requests
@@ -123,8 +122,8 @@ def run(self) -> bool:
print(f"couldn't find team named {self.team_name}")
return False
- # GitHub limits comments to 65,536 characters, let's limit the diff
- # and the file list to 20kB each.
+ # GitHub limits comments to 65,536 characters, let's limit the diff
+ # and the file list to 20kB each.
STAT_LIMIT = 20 * 1024
DIFF_LIMIT = 20 * 1024
@@ -138,9 +137,9 @@ def run(self) -> bool:
diff_stats += f"-{file.deletions}"
diff_stats += ") "
if file.status == "renamed":
- print(f"(from {file.previous_filename})"
+ print(f"(from {file.previous_filename})")
diff_stats += "\n"
- if len(diff_stats) > STAT_LIMIT)
+ if len(diff_stats) > STAT_LIMIT:
break
# Get the diff
>From 8c0b3bc07d236816f1a11f280f2c56e36adf48c9 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 10:18:46 +0200
Subject: [PATCH 6/8] Only escape # and @ when they could be part of an issue
number/handle
---
llvm/utils/git/github-automation.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 55a08a6eee17a63..11d21967cf210c0 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -50,7 +50,11 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
def escape_description(str):
# https://github.com/github/markup/issues/1168#issuecomment-494946168
str = html.escape(str, False)
- return str.replace("@", "@<!-- -->").replace("#", "#<!-- -->")
+ # '@' followed by alphanum is a user name
+ str = re.sub("@(?=\w+)","@<!-- -->", str)
+ # '#' followed by digits is considered an issue number
+ str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
+ return str
class IssueSubscriber:
>From 47ad7a04f072c4c0a8822bb8a074369a13f4a15d Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 21:00:05 +0200
Subject: [PATCH 7/8] Use a markdown codeblock for the diff.
This way we can render < and & properly.
This only work if there is a new line before the 3 ticks.
---
llvm/utils/git/github-automation.py | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 11d21967cf210c0..9de02a9a71da787 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -56,6 +56,9 @@ def escape_description(str):
str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
return str
+def sanitize_markdown_code_block(str):
+ # remove codeblocks terminators
+ return re.sub("^\s*```\s*$", r"` ` `", str)
class IssueSubscriber:
@property
@@ -148,18 +151,21 @@ def run(self) -> bool:
# Get the diff
try:
- patch = html.escape(requests.get(self.pr.diff_url).text)
+ patch = requests.get(self.pr.diff_url).text
except:
patch = ""
- # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
+ patch = sanitize_markdown_code_block(patch)
+
patch_link = f"Full diff: {self.pr.diff_url}\n"
if len(patch) > DIFF_LIMIT:
patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
- patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n<truncated>\n"
+ patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
team_mention = "@llvm/{}".format(team.slug)
body = escape_description(self.pr.body)
+# Note: the comment is in markdown and the code below
+# is sensible to line break
comment = f"""
{self.COMMENT_TAG}
{team_mention}
@@ -172,9 +178,10 @@ def run(self) -> bool:
{diff_stats}
-<pre lang="diff">
+```diff
{patch}
-</pre>
+```
+
</details>
"""
>From 1c7b435c7960b31a2cfbc6d074619ebbb593bf33 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 21:14:20 +0200
Subject: [PATCH 8/8] formatting
---
llvm/utils/git/github-automation.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 9de02a9a71da787..d5cf511ebeecbef 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -56,6 +56,7 @@ def escape_description(str):
str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
return str
+
def sanitize_markdown_code_block(str):
# remove codeblocks terminators
return re.sub("^\s*```\s*$", r"` ` `", str)
@@ -164,8 +165,8 @@ def run(self) -> bool:
team_mention = "@llvm/{}".format(team.slug)
body = escape_description(self.pr.body)
-# Note: the comment is in markdown and the code below
-# is sensible to line break
+ # Note: the comment is in markdown and the code below
+ # is sensible to line break
comment = f"""
{self.COMMENT_TAG}
{team_mention}
More information about the llvm-commits
mailing list