[llvm] [Github] Escape `@` and html in the <details> block (PR #66118)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 01:19:51 PDT 2023


https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/66118:

>From 427f5444048e6cebef59592410329f80864503e8 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 19:48:47 +0200
Subject: [PATCH 1/6] [Github] Escape `@` and html in the <details> block

* This avoid pinging folks on all issue when they got pinged on
bugzilla eaons ago

* Avoid formating bugs when there is html in the issue description
---
 llvm/utils/git/github-automation.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 0517cac286a30f0..8e36c7af3cacb86 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -12,6 +12,7 @@
 from git import Repo  # type: ignore
 import html
 import github
+import html
 import os
 import re
 import requests
@@ -46,6 +47,9 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
             return team
     return None
 
+def escape_description(str):
+    # https://github.com/github/markup/issues/1168#issuecomment-494946168
+    return html.escape(str.replace("@", "@<!-- -->"), False)
 
 class IssueSubscriber:
     @property
@@ -67,12 +71,15 @@ def run(self) -> bool:
         if team.slug == "issue-subscribers-good-first-issue":
             comment = "{}\n".format(beginner_comment)
 
-        comment = (
-            f"@llvm/{team.slug}"
-            + "\n\n<details>\n"
-            + f"{self.issue.body}\n"
-            + "</details>"
-        )
+        body = escape_description(self.issue.body)
+
+        comment = ( f"""
+ at llvm/{team.slug}
+
+<details>
+{body}
+</details>
+""" )
 
         self.issue.create_comment(comment)
         return True

>From 29a1cb55ea3a846ea7fb129e105f70fead0a7bae Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 19:57:49 +0200
Subject: [PATCH 2/6] Fix order of operations: we should escape html first so
 the comment does not get escaped

---
 llvm/utils/git/github-automation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 8e36c7af3cacb86..9fc09137078f11b 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -49,7 +49,8 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
 
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
-    return html.escape(str.replace("@", "@<!-- -->"), False)
+    str = html.escape(str, False)
+    return str.replace("@", "@<!-- -->")
 
 class IssueSubscriber:
     @property

>From bc85028c03a6669faeca66f70e03651791d90734 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Tue, 12 Sep 2023 20:05:41 +0200
Subject: [PATCH 3/6] Formatting

---
 llvm/utils/git/github-automation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 9fc09137078f11b..22d62dfa546af79 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -47,11 +47,13 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
             return team
     return None
 
+
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
     str = html.escape(str, False)
     return str.replace("@", "@<!-- -->")
 
+
 class IssueSubscriber:
     @property
     def team_name(self) -> str:
@@ -74,13 +76,13 @@ def run(self) -> bool:
 
         body = escape_description(self.issue.body)
 
-        comment = ( f"""
+        comment = f"""
 @llvm/{team.slug}
 
 <details>
 {body}
 </details>
-""" )
+"""
 
         self.issue.create_comment(comment)
         return True

>From e8e769c0f08b013307b6bd9cd7d0b634f597a620 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Wed, 13 Sep 2023 07:21:08 +0200
Subject: [PATCH 4/6] * Escape # * Escape PR description * Trunkate the list of
 files if it's > 20K * Color the diff on github

---
 llvm/utils/git/github-automation.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 22d62dfa546af79..2a372622954b4bd 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -51,7 +51,7 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
     str = html.escape(str, False)
-    return str.replace("@", "@<!-- -->")
+    return str.replace("@", "@<!-- -->").replace("#", "#<!-- -->")
 
 
 class IssueSubscriber:
@@ -123,6 +123,11 @@ def run(self) -> bool:
             print(f"couldn't find team named {self.team_name}")
             return False
 
+         # GitHub limits comments to 65,536 characters, let's limit the diff
+         # and the file list to 20kB each.
+        STAT_LIMIT = 20 * 1024
+        DIFF_LIMIT = 20 * 1024
+
         # Get statistics for each file
         diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
         for file in self.pr.get_files():
@@ -133,37 +138,40 @@ def run(self) -> bool:
                 diff_stats += f"-{file.deletions}"
             diff_stats += ") "
             if file.status == "renamed":
-                print(f"(from {file.previous_filename})")
+                print(f"(from {file.previous_filename})"
             diff_stats += "\n"
-        diff_stats += "\n"
+            if len(diff_stats) > STAT_LIMIT)
+                break
 
         # Get the diff
         try:
             patch = html.escape(requests.get(self.pr.diff_url).text)
         except:
             patch = ""
-        diff_stats += "\n<pre>\n" + patch
 
         # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
-        DIFF_LIMIT = 20 * 1024
         patch_link = f"Full diff: {self.pr.diff_url}\n"
         if len(patch) > DIFF_LIMIT:
             patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
-            diff_stats = html.escape(diff_stats[0:DIFF_LIMIT]) + "...\n<truncated>\n"
-        diff_stats += "</pre>"
+            patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n<truncated>\n"
         team_mention = "@llvm/{}".format(team.slug)
 
-        body = self.pr.body
+        body = escape_description(self.pr.body)
         comment = f"""
 {self.COMMENT_TAG}
 {team_mention}
-            
+
 <details>
 <summary>Changes</summary>
 {body}
 --
 {patch_link}
+
 {diff_stats}
+
+<pre lang="diff">
+{patch}
+</pre>
 </details>
 """
 

>From 1ab5ca729c1aa20dc138aabb759ac759abf02550 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Wed, 13 Sep 2023 07:25:29 +0200
Subject: [PATCH 5/6] Formatting

---
 llvm/utils/git/github-automation.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 2a372622954b4bd..55a08a6eee17a63 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -12,7 +12,6 @@
 from git import Repo  # type: ignore
 import html
 import github
-import html
 import os
 import re
 import requests
@@ -123,8 +122,8 @@ def run(self) -> bool:
             print(f"couldn't find team named {self.team_name}")
             return False
 
-         # GitHub limits comments to 65,536 characters, let's limit the diff
-         # and the file list to 20kB each.
+        # GitHub limits comments to 65,536 characters, let's limit the diff
+        # and the file list to 20kB each.
         STAT_LIMIT = 20 * 1024
         DIFF_LIMIT = 20 * 1024
 
@@ -138,9 +137,9 @@ def run(self) -> bool:
                 diff_stats += f"-{file.deletions}"
             diff_stats += ") "
             if file.status == "renamed":
-                print(f"(from {file.previous_filename})"
+                print(f"(from {file.previous_filename})")
             diff_stats += "\n"
-            if len(diff_stats) > STAT_LIMIT)
+            if len(diff_stats) > STAT_LIMIT:
                 break
 
         # Get the diff

>From 916892cf5ceeac879f842cf0fd26780192a8e568 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 14 Sep 2023 10:18:46 +0200
Subject: [PATCH 6/6] Only escape # and @ when they could be part of an issue
 number/handle

---
 llvm/utils/git/github-automation.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 55a08a6eee17a63..11d21967cf210c0 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -50,7 +50,11 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
 def escape_description(str):
     # https://github.com/github/markup/issues/1168#issuecomment-494946168
     str = html.escape(str, False)
-    return str.replace("@", "@<!-- -->").replace("#", "#<!-- -->")
+    # '@' followed by alphanum is a user name
+    str = re.sub("@(?=\w+)","@<!-- -->", str)
+    # '#' followed by digits is considered an issue number
+    str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
+    return str
 
 
 class IssueSubscriber:



More information about the llvm-commits mailing list