[llvm] [workflows] Split pr-code-format into two parts to make it more secure (PR #78216)

Fri Jan 19 22:37:35 PST 2024

https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/78216

>From 147c1d2693eddc58528b85a6f7c154fa796a103b Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Sat, 13 Jan 2024 16:16:30 -0800
Subject: [PATCH 1/2] [workflows] Split pr-code-format into two parts to make
 it more secure

Actions triggered by pull_request_target events have access to all
repository secrets, so it is unsafe to use them when executing
untrusted code.  The pr-code-format workflow does not execute
any untrusted code, but it passes untrused input into clang-format.
An attacker could use this to exploit a flaw in clang-format and
potentially gain access to the repository secrets.

By splitting the workflow, we can use the pull_request target which
is more secure and isolate the issue write permissions in a separate
job.  The pull_request target also makes it easier to test changes
to the code-format-helepr.py script, because the version of the
script from the pull request will be used rather than the
version of the script from main.

Fixes #77142
---
 .github/workflows/issue-write.yml    | 72 ++++++++++++++++++++++++++++
 .github/workflows/pr-code-format.yml | 30 +++++-------
 llvm/utils/git/code-format-helper.py | 27 +++++++++++
 3 files changed, 111 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/issue-write.yml

diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml
new file mode 100644
index 00000000000000..acc625a3e02a3d
--- /dev/null
+++ b/.github/workflows/issue-write.yml
@@ -0,0 +1,72 @@
+name: Comment on an issue
+
+on:
+  workflow_run:
+    workflows: ["Check code formatting"]
+    types:
+      - completed
+
+permissions:
+  contents: read
+
+jobs:
+  pr-comment:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    if: >
+      github.event.workflow_run.event == 'pull_request'
+    steps:
+      - name: 'Download artifact'
+        # v7.0.1
+        uses: actions/github-script at 60a0d83039c74a4aee543508d2ffcb1c3799cdea
+        with:
+          script: |
+            let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: context.payload.workflow_run.id,
+            });
+            let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "workflow-args"
+            })[0];
+            let download = await github.rest.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: 'zip',
+            });
+            let fs = require('fs');
+            fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/workflow-args.zip`, Buffer.from(download.data));
+
+      - run: unzip workflow-args.zip
+
+      - name: 'Comment on PR'
+        uses: actions/github-script at v3
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            var fs = require('fs');
+            const comments = JSON.parse(fs.readFileSync('./comments'));
+            if (!comments) {
+              return;
+            }
+            console.log(comments);
+            await comments.forEach(function (comment) {
+              if (comment.id) {
+                github.issues.updateComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: comment.number,
+                  comment_id: comment.id,
+                  body: comment.body
+                });
+              } else {
+                github.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: comment.number,
+                  body: comment.body
+                });
+              }
+            });
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 5223089ee8a93d..1475d872498d49 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -1,7 +1,5 @@
 name: "Check code formatting"
-on: pull_request_target
-permissions:
-  pull-requests: write
+on: pull_request
 
 jobs:
   code_formatter:
@@ -27,18 +25,6 @@ jobs:
           separator: ","
           skip_initial_fetch: true
 
-      # We need to make sure that we aren't executing/using any code from the
-      # PR for security reasons as we're using pull_request_target. Checkout
-      # the target branch with the necessary files.
-      - name: Fetch code formatting utils
-        uses: actions/checkout at v4
-        with:
-          sparse-checkout: |
-            llvm/utils/git/requirements_formatting.txt
-            llvm/utils/git/code-format-helper.py
-          sparse-checkout-cone-mode: false
-          path: code-format-tools
-
       - name: "Listed files"
         env:
           CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
@@ -56,10 +42,10 @@ jobs:
         with:
           python-version: '3.11'
           cache: 'pip'
-          cache-dependency-path: 'code-format-tools/llvm/utils/git/requirements_formatting.txt'
+          cache-dependency-path: 'llvm/utils/git/requirements_formatting.txt'
 
       - name: Install python dependencies
-        run: pip install -r code-format-tools/llvm/utils/git/requirements_formatting.txt
+        run: pip install -r llvm/utils/git/requirements_formatting.txt
 
       - name: Run code formatter
         env:
@@ -72,9 +58,17 @@ jobs:
         # explicitly in code-format-helper.py and not have to diff starting at
         # the merge base.
         run: |
-          python ./code-format-tools/llvm/utils/git/code-format-helper.py \
+          python ./llvm/utils/git/code-format-helper.py \
+            --write-comment-to-file \
             --token ${{ secrets.GITHUB_TOKEN }} \
             --issue-number $GITHUB_PR_NUMBER \
             --start-rev $(git merge-base $START_REV $END_REV) \
             --end-rev $END_REV \
             --changed-files "$CHANGED_FILES"
+
+      - uses: actions/upload-artifact at v2
+        if: always()
+        with:
+          name: workflow-args
+          path: |
+            comments
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 8a29a57d8d16bd..f96c9da586dfcc 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -44,6 +44,7 @@ class FormatArgs:
     token: str = None
     verbose: bool = True
     issue_number: int = 0
+    write_comment_to_file: bool = False
 
     def __init__(self, args: argparse.Namespace = None) -> None:
         if not args is None:
@@ -53,12 +54,14 @@ def __init__(self, args: argparse.Namespace = None) -> None:
             self.token = args.token
             self.changed_files = args.changed_files
             self.issue_number = args.issue_number
+            self.write_comment_to_file = args.write_comment_to_file
 
 
 class FormatHelper:
     COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->"
     name: str
     friendly_name: str
+    comment: dict = None
 
     @property
     def comment_tag(self) -> str:
@@ -119,6 +122,16 @@ def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> No
         comment_text = self.comment_tag + "\n\n" + comment_text
 
         existing_comment = self.find_comment(pr)
+
+        if args.write_comment_to_file:
+            self.comment = {
+                'number' : pr.number,
+                'body' : comment_text
+            }
+            if existing_comment:
+                self.comment['id'] =  existing_comment.id
+            return
+
         if existing_comment:
             existing_comment.edit(comment_text)
         elif create_new:
@@ -309,6 +322,8 @@ def hook_main():
         if fmt.has_tool():
             if not fmt.run(args.changed_files, args):
                 failed_fmts.append(fmt.name)
+            if fmt.comment:
+              comments.append(fmt.comment)
         else:
             print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
 
@@ -349,6 +364,10 @@ def hook_main():
         type=str,
         help="Comma separated list of files that has been changed",
     )
+    parser.add_argument(
+        "--write-comment-to-file",
+        action='store_true',
+        help="Don't create a comments on the PR, instead write the comments and metadata a file called 'comment'"   )
 
     args = FormatArgs(parser.parse_args())
 
@@ -357,9 +376,17 @@ def hook_main():
         changed_files = args.changed_files.split(",")
 
     failed_formatters = []
+    comments = []
     for fmt in ALL_FORMATTERS:
         if not fmt.run(changed_files, args):
             failed_formatters.append(fmt.name)
+        if fmt.comment:
+            comments.append(fmt.comment)
+    
+    if len(comments):
+        with open('comments', 'w') as f:
+            import json
+            json.dump(comments, f)
 
     if len(failed_formatters) > 0:
         print(f"error: some formatters failed: {' '.join(failed_formatters)}")

>From cd37e90776ff2f9bcab162dedecbd7549ef83689 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Fri, 19 Jan 2024 22:26:05 -0800
Subject: [PATCH 2/2] Dump comments file to make debugging easier

---
 .github/workflows/issue-write.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml
index acc625a3e02a3d..b3bfd12058d529 100644
--- a/.github/workflows/issue-write.yml
+++ b/.github/workflows/issue-write.yml
@@ -70,3 +70,7 @@ jobs:
                 });
               }
             });
+
+      - name: Dump comments file
+        if: always()
+        run: cat comments