[llvm] [Github] Improve formating of PR diffs in bot notifications (PR #66118)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 23:46:09 PDT 2023


cor3ntin wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-github-workflow

<details>
<summary>Changes</summary>
* This avoid pinging folks on all issue when they got pinged on bugzilla eons ago
* Avoid formatting bugs when there is html in the issue description
* Truncate the list of files and the diff independently of each other. This avoids truncating cutting a file line in 2 and to cut in the middle of html markup. This is a fringe case but it does happen when people accidentally push weird branches conflicting on all the files.


--
Full diff: https://github.com/llvm/llvm-project/pull/66118.diff


1 Files Affected:

- (modified) llvm/utils/git/github-automation.py (+45-15) 


```diff
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index eac5816b5499f6a..7f4ea46a0741249 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -47,6 +47,21 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
     return None
 
 
+def escape_description(str):
+    # https://github.com/github/markup/issues/1168#issuecomment-494946168
+    str = html.escape(str, False)
+    # '@' followed by alphanum is a user name
+    str = re.sub("@(?=\w+)", "@<!-- -->", str)
+    # '#' followed by digits is considered an issue number
+    str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
+    return str
+
+
+def sanitize_markdown_code_block(str):
+    # remove codeblocks terminators
+    return re.sub("^\s*```\s*$", r"` ` `", str)
+
+
 class IssueSubscriber:
     @property
     def team_name(self) -> str:
@@ -67,12 +82,15 @@ def run(self) -> bool:
         if team.slug == "issue-subscribers-good-first-issue":
             comment = "{}\n".format(beginner_comment)
 
-        comment = (
-            f"@llvm/{team.slug}"
-            + "\n\n<details>\n"
-            + f"{self.issue.body}\n"
-            + "</details>"
-        )
+        body = escape_description(self.issue.body)
+
+        comment = f"""
+ at llvm/{team.slug}
+
+<details>
+{body}
+</details>
+"""
 
         self.issue.create_comment(comment)
         return True
@@ -113,6 +131,11 @@ def run(self) -> bool:
             print(f"couldn't find team named {self.team_name}")
             return False
 
+        # GitHub limits comments to 65,536 characters, let's limit the diff
+        # and the file list to 20kB each.
+        STAT_LIMIT = 20 * 1024
+        DIFF_LIMIT = 20 * 1024
+
         # Get statistics for each file
         diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
         for file in self.pr.get_files():
@@ -125,35 +148,42 @@ def run(self) -> bool:
             if file.status == "renamed":
                 print(f"(from {file.previous_filename})")
             diff_stats += "\n"
-        diff_stats += "\n"
+            if len(diff_stats) > STAT_LIMIT:
+                break
 
         # Get the diff
         try:
-            patch = html.escape(requests.get(self.pr.diff_url).text)
+            patch = requests.get(self.pr.diff_url).text
         except:
             patch = ""
-        diff_stats += "\n<pre>\n" + html.escape(patch)
 
-        # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
-        DIFF_LIMIT = 20 * 1024
+        patch = sanitize_markdown_code_block(patch)
+
         patch_link = f"Full diff: {self.pr.diff_url}\n"
         if len(patch) > DIFF_LIMIT:
             patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
-            diff_stats = diff_stats[0:DIFF_LIMIT] + "...\n<truncated>\n"
-        diff_stats += "</pre>"
+            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
         team_mention = "@llvm/{}".format(team.slug)
 
-        body = self.pr.body
+        body = escape_description(self.pr.body)
+        # Note: the comment is in markdown and the code below
+        # is sensible to line break
         comment = f"""
 {self.COMMENT_TAG}
 {team_mention}
-            
+
 <details>
 <summary>Changes</summary>
 {body}
 --
 {patch_link}
+
 {diff_stats}
+
+```diff
+{patch}
+```
+
 </details>
 """
 

```

</details>


https://github.com/llvm/llvm-project/pull/66118


More information about the llvm-commits mailing list