[llvm] [Github] Improve formating of PR diffs in bot notifications (PR #66118)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 12:14:36 PDT 2023


https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/66118:

>From 72be28c61643044888c76b74b7b7e5d15b386ede Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 19:48:47 +0200
Subject: [PATCH 1/8] [Github] Escape `@` and html in the <details> block

* This avoid pinging folks on all issue when they got pinged on
bugzilla eaons ago

* Avoid formating bugs when there is html in the issue description
---
 llvm/utils/git/github-automation.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index eac5816b5499f6a..02e47794126584a 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -12,6 +12,7 @@
 from git import Repo  # type: ignore
 import html
 import github
+import html
 import os
 import re
 import requests
@@ -46,6 +47,9 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
             return team
     return None
 
+def escape_description(str):
+    # https://github.com/github/markup/issues/1168#issuecomment-494946168
+    return html.escape(str.replace("@", "@<!-- -->"), False)
 
 class IssueSubscriber:
     @property
@@ -67,12 +71,15 @@ def run(self) -> bool:
         if team.slug == "issue-subscribers-good-first-issue":
             comment = "{}\n".format(beginner_comment)
 
-        comment = (
-            f"@llvm/{team.slug}"
-            + "\n\n<details>\n"
-            + f"{self.issue.body}\n"
-            + "</details>"
-        )
+        body = escape_description(self.issue.body)
+
+        comment = ( f"""
+ at llvm/{team.slug}
+
+<details>
+{body}
+</details>
+""" )
 
         self.issue.create_comment(comment)
         return True

>From 42bb7d5940a1dfb6e385838687c770bc37c6512a Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 19:57:49 +0200
Subject: [PATCH 2/8] Fix order of operations: we should escape html first so
 the comment does not get escaped

---
 llvm/utils/git/github-automation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 02e47794126584a..c1885ea4af8015e 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -49,7 +49,8 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
 
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
-    return html.escape(str.replace("@", "@<!-- -->"), False)
+    str = html.escape(str, False)
+    return str.replace("@", "@<!-- -->")
 
 class IssueSubscriber:
     @property

>From b4b4e8f48390212d5bcc296f121050db4c3fb3be Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 20:05:41 +0200
Subject: [PATCH 3/8] Formatting

---
 llvm/utils/git/github-automation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index c1885ea4af8015e..eeba5eaff97fb34 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -47,11 +47,13 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
             return team
     return None
 
+
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
     str = html.escape(str, False)
     return str.replace("@", "@<!-- -->")
 
+
 class IssueSubscriber:
     @property
     def team_name(self) -> str:
@@ -74,13 +76,13 @@ def run(self) -> bool:
 
         body = escape_description(self.issue.body)
 
-        comment = ( f"""
+        comment = f"""
 @llvm/{team.slug}
 
 <details>
 {body}
 </details>
-""" )
+"""
 
         self.issue.create_comment(comment)
         return True

>From d12147f1587e1f8ac29b31e159eb4e5efaefb810 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Wed, 13 Sep 2023 07:21:08 +0200
Subject: [PATCH 4/8] * Escape # * Escape PR description * Trunkate the list of
 files if it's > 20K * Color the diff on github

---
 llvm/utils/git/github-automation.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index eeba5eaff97fb34..2a372622954b4bd 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -51,7 +51,7 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
     str = html.escape(str, False)
-    return str.replace("@", "@<!-- -->")
+    return str.replace("@", "@<!-- -->").replace("#", "#<!-- -->")
 
 
 class IssueSubscriber:
@@ -123,6 +123,11 @@ def run(self) -> bool:
             print(f"couldn't find team named {self.team_name}")
             return False
 
+         # GitHub limits comments to 65,536 characters, let's limit the diff
+         # and the file list to 20kB each.
+        STAT_LIMIT = 20 * 1024
+        DIFF_LIMIT = 20 * 1024
+
         # Get statistics for each file
         diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
         for file in self.pr.get_files():
@@ -133,37 +138,40 @@ def run(self) -> bool:
                 diff_stats += f"-{file.deletions}"
             diff_stats += ") "
             if file.status == "renamed":
-                print(f"(from {file.previous_filename})")
+                print(f"(from {file.previous_filename})"
             diff_stats += "\n"
-        diff_stats += "\n"
+            if len(diff_stats) > STAT_LIMIT)
+                break
 
         # Get the diff
         try:
             patch = html.escape(requests.get(self.pr.diff_url).text)
         except:
             patch = ""
-        diff_stats += "\n<pre>\n" + html.escape(patch)
 
         # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
-        DIFF_LIMIT = 20 * 1024
         patch_link = f"Full diff: {self.pr.diff_url}\n"
         if len(patch) > DIFF_LIMIT:
             patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
-            diff_stats = diff_stats[0:DIFF_LIMIT] + "...\n<truncated>\n"
-        diff_stats += "</pre>"
+            patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n<truncated>\n"
         team_mention = "@llvm/{}".format(team.slug)
 
-        body = self.pr.body
+        body = escape_description(self.pr.body)
         comment = f"""
 {self.COMMENT_TAG}
 {team_mention}
-            
+
 <details>
 <summary>Changes</summary>
 {body}
 --
 {patch_link}
+
 {diff_stats}
+
+<pre lang="diff">
+{patch}
+</pre>
 </details>
 """
 

>From b6456169661b49556dbc6b73baed1d64561bbf56 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Wed, 13 Sep 2023 07:25:29 +0200
Subject: [PATCH 5/8] Formatting

---
 llvm/utils/git/github-automation.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 2a372622954b4bd..55a08a6eee17a63 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -12,7 +12,6 @@
 from git import Repo  # type: ignore
 import html
 import github
-import html
 import os
 import re
 import requests
@@ -123,8 +122,8 @@ def run(self) -> bool:
             print(f"couldn't find team named {self.team_name}")
             return False
 
-         # GitHub limits comments to 65,536 characters, let's limit the diff
-         # and the file list to 20kB each.
+        # GitHub limits comments to 65,536 characters, let's limit the diff
+        # and the file list to 20kB each.
         STAT_LIMIT = 20 * 1024
         DIFF_LIMIT = 20 * 1024
 
@@ -138,9 +137,9 @@ def run(self) -> bool:
                 diff_stats += f"-{file.deletions}"
             diff_stats += ") "
             if file.status == "renamed":
-                print(f"(from {file.previous_filename})"
+                print(f"(from {file.previous_filename})")
             diff_stats += "\n"
-            if len(diff_stats) > STAT_LIMIT)
+            if len(diff_stats) > STAT_LIMIT:
                 break
 
         # Get the diff

>From 8c0b3bc07d236816f1a11f280f2c56e36adf48c9 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 10:18:46 +0200
Subject: [PATCH 6/8] Only escape # and @ when they could be part of an issue
 number/handle

---
 llvm/utils/git/github-automation.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 55a08a6eee17a63..11d21967cf210c0 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -50,7 +50,11 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
     str = html.escape(str, False)
-    return str.replace("@", "@<!-- -->").replace("#", "#<!-- -->")
+    # '@' followed by alphanum is a user name
+    str = re.sub("@(?=\w+)","@<!-- -->", str)
+    # '#' followed by digits is considered an issue number
+    str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
+    return str
 
 
 class IssueSubscriber:

>From 47ad7a04f072c4c0a8822bb8a074369a13f4a15d Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 21:00:05 +0200
Subject: [PATCH 7/8] Use a markdown codeblock for the diff.

This way we can render < and & properly.

This only work if there is a new line before the 3 ticks.
---
 llvm/utils/git/github-automation.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 11d21967cf210c0..9de02a9a71da787 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -56,6 +56,9 @@ def escape_description(str):
     str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
     return str
 
+def sanitize_markdown_code_block(str):
+    # remove codeblocks terminators
+    return re.sub("^\s*```\s*$", r"` ` `", str)
 
 class IssueSubscriber:
     @property
@@ -148,18 +151,21 @@ def run(self) -> bool:
 
         # Get the diff
         try:
-            patch = html.escape(requests.get(self.pr.diff_url).text)
+            patch = requests.get(self.pr.diff_url).text
         except:
             patch = ""
 
-        # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
+        patch = sanitize_markdown_code_block(patch)
+
         patch_link = f"Full diff: {self.pr.diff_url}\n"
         if len(patch) > DIFF_LIMIT:
             patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
-            patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n<truncated>\n"
+            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
         team_mention = "@llvm/{}".format(team.slug)
 
         body = escape_description(self.pr.body)
+# Note: the comment is in markdown and the code below
+# is sensible to line break
         comment = f"""
 {self.COMMENT_TAG}
 {team_mention}
@@ -172,9 +178,10 @@ def run(self) -> bool:
 
 {diff_stats}
 
-<pre lang="diff">
+```diff
 {patch}
-</pre>
+```
+
 </details>
 """
 

>From 1c7b435c7960b31a2cfbc6d074619ebbb593bf33 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 21:14:20 +0200
Subject: [PATCH 8/8] formatting

---
 llvm/utils/git/github-automation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 9de02a9a71da787..d5cf511ebeecbef 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -56,6 +56,7 @@ def escape_description(str):
     str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
     return str
 
+
 def sanitize_markdown_code_block(str):
     # remove codeblocks terminators
     return re.sub("^\s*```\s*$", r"` ` `", str)
@@ -164,8 +165,8 @@ def run(self) -> bool:
         team_mention = "@llvm/{}".format(team.slug)
 
         body = escape_description(self.pr.body)
-# Note: the comment is in markdown and the code below
-# is sensible to line break
+        # Note: the comment is in markdown and the code below
+        # is sensible to line break
         comment = f"""
 {self.COMMENT_TAG}
 {team_mention}



More information about the llvm-commits mailing list