[llvm] [CI] Make premerge_advisor_explain write comments (PR #166605)

Fri Nov 7 11:23:33 PST 2025

https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/166605

>From 0dc46fc31d671960c39652d3741525749ed88d55 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Wed, 5 Nov 2025 18:12:26 +0000
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7

[skip ci]
---
 .ci/generate_test_report_github.py   |  15 +---
 .ci/generate_test_report_lib.py      |  71 +++++++++++++------
 .ci/generate_test_report_lib_test.py | 101 +++++++++++++++++++++++++++
 3 files changed, 155 insertions(+), 32 deletions(-)

diff --git a/.ci/generate_test_report_github.py b/.ci/generate_test_report_github.py
index 08387de817467..18c5e078a5064 100644
--- a/.ci/generate_test_report_github.py
+++ b/.ci/generate_test_report_github.py
@@ -4,21 +4,10 @@
 """Script to generate a build report for Github."""
 
 import argparse
-import platform
 
 import generate_test_report_lib
 
 
-def compute_platform_title() -> str:
-    logo = ":window:" if platform.system() == "Windows" else ":penguin:"
-    # On Linux the machine value is x86_64 on Windows it is AMD64.
-    if platform.machine() == "x86_64" or platform.machine() == "AMD64":
-        arch = "x64"
-    else:
-        arch = platform.machine()
-    return f"{logo} {platform.system()} {arch} Test Results"
-
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("return_code", help="The build's return code.", type=int)
@@ -28,7 +17,9 @@ def compute_platform_title() -> str:
     args = parser.parse_args()
 
     report = generate_test_report_lib.generate_report_from_files(
-        compute_platform_title(), args.return_code, args.build_test_logs
+        generate_test_report_lib.compute_platform_title(),
+        args.return_code,
+        args.build_test_logs,
     )
 
     print(report)
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py
index 0c025c561f6f7..48a6be903da41 100644
--- a/.ci/generate_test_report_lib.py
+++ b/.ci/generate_test_report_lib.py
@@ -3,8 +3,20 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """Library to parse JUnit XML files and return a markdown report."""
 
+from typing import TypedDict
+import platform
+
 from junitparser import JUnitXml, Failure
 
+
+# This data structure should match the definition in llvm-zorg in
+# premerge/advisor/advisor_lib.py
+class FailureExplanation(TypedDict):
+    name: str
+    explained: bool
+    reason: str | None
+
+
 SEE_BUILD_FILE_STR = "Download the build's log file to see the details."
 UNRELATED_FAILURES_STR = (
     "If these failures are unrelated to your changes (for example "
@@ -82,16 +94,29 @@ def find_failure_in_ninja_logs(ninja_logs: list[list[str]]) -> list[tuple[str, s
     return failures
 
 
-def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]:
-    """Formats ninja failures into summary views for the report."""
+def _format_failures(
+    failures: list[tuple[str, str]], failure_explanations: dict[str, FailureExplanation]
+) -> list[str]:
+    """Formats failures into summary views for the report."""
     output = []
-    for build_failure in ninja_failures:
+    for build_failure in failures:
         failed_action, failure_message = build_failure
+        failure_explanation = None
+        if failed_action in failure_explanations:
+            failure_explanation = failure_explanations[failed_action]
+        output.append("<details>")
+        if failure_explanation:
+            output.extend(
+                [
+                    f"<summary>{failed_action} (Likely Already Failing)</summary>" "",
+                    failure_explanation["reason"],
+                    "",
+                ]
+            )
+        else:
+            output.extend([f"<summary>{failed_action}</summary>", ""])
         output.extend(
             [
-                "<details>",
-                f"<summary>{failed_action}</summary>",
-                "",
                 "```",
                 failure_message,
                 "```",
@@ -132,12 +157,19 @@ def generate_report(
     ninja_logs: list[list[str]],
     size_limit=1024 * 1024,
     list_failures=True,
+    failure_explanations_list: list[FailureExplanation] = [],
 ):
     failures = get_failures(junit_objects)
     tests_run = 0
     tests_skipped = 0
     tests_failed = 0
 
+    failure_explanations: dict[str, FailureExplanation] = {}
+    for failure_explanation in failure_explanations_list:
+        if not failure_explanation["explained"]:
+            continue
+        failure_explanations[failure_explanation["name"]] = failure_explanation
+
     for results in junit_objects:
         for testsuite in results:
             tests_run += testsuite.tests
@@ -176,7 +208,7 @@ def generate_report(
                         "",
                     ]
                 )
-                report.extend(_format_ninja_failures(ninja_failures))
+                report.extend(_format_failures(ninja_failures, failure_explanations))
                 report.extend(
                     [
                         "",
@@ -212,18 +244,7 @@ def plural(num_tests):
 
         for testsuite_name, failures in failures.items():
             report.extend(["", f"### {testsuite_name}"])
-            for name, output in failures:
-                report.extend(
-                    [
-                        "<details>",
-                        f"<summary>{name}</summary>",
-                        "",
-                        "```",
-                        output,
-                        "```",
-                        "</details>",
-                    ]
-                )
+            report.extend(_format_failures(failures, failure_explanations))
     elif return_code != 0:
         # No tests failed but the build was in a failed state. Bring this to the user's
         # attention.
@@ -248,7 +269,7 @@ def plural(num_tests):
                     "",
                 ]
             )
-            report.extend(_format_ninja_failures(ninja_failures))
+            report.extend(_format_failures(ninja_failures, failure_explanations))
 
     if failures or return_code != 0:
         report.extend(["", UNRELATED_FAILURES_STR])
@@ -285,3 +306,13 @@ def load_info_from_files(build_log_files):
 def generate_report_from_files(title, return_code, build_log_files):
     junit_objects, ninja_logs = load_info_from_files(build_log_files)
     return generate_report(title, return_code, junit_objects, ninja_logs)
+
+
+def compute_platform_title() -> str:
+    logo = ":window:" if platform.system() == "Windows" else ":penguin:"
+    # On Linux the machine value is x86_64 on Windows it is AMD64.
+    if platform.machine() == "x86_64" or platform.machine() == "AMD64":
+        arch = "x64"
+    else:
+        arch = platform.machine()
+    return f"{logo} {platform.system()} {arch} Test Results"
diff --git a/.ci/generate_test_report_lib_test.py b/.ci/generate_test_report_lib_test.py
index 4068a3b7300a4..db966a84e09f2 100644
--- a/.ci/generate_test_report_lib_test.py
+++ b/.ci/generate_test_report_lib_test.py
@@ -781,6 +781,107 @@ def test_report_size_limit(self):
             ),
         )
 
+    def test_report_ninja_explanation(self):
+        self.assertEqual(
+            generate_test_report_lib.generate_report(
+                "Foo",
+                1,
+                [],
+                [
+                    [
+                        "[1/5] test/1.stamp",
+                        "[2/5] test/2.stamp",
+                        "[3/5] test/3.stamp",
+                        "[4/5] test/4.stamp",
+                        "FAILED: test/4.stamp",
+                        "touch test/4.stamp",
+                        "Half Moon Bay.",
+                        "[5/5] test/5.stamp",
+                    ]
+                ],
+                failure_explanations_list=[
+                    {
+                        "name": "test/4.stamp",
+                        "explained": True,
+                        "reason": "Failing at head",
+                    }
+                ],
+            ),
+            dedent(
+                """\
+            # Foo
+
+            The build failed before running any tests. Click on a failure below to see the details.
+
+            <details>
+            <summary>test/4.stamp (Likely Already Failing)</summary>
+            Failing at head
+
+            ```
+            FAILED: test/4.stamp
+            touch test/4.stamp
+            Half Moon Bay.
+            ```
+            </details>
+            
+            If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the `infrastructure` label."""
+            ),
+        )
+
+    def test_report_test_failure_explanation(self):
+        self.assertEqual(
+            generate_test_report_lib.generate_report(
+                "Foo",
+                1,
+                [
+                    junit_from_xml(
+                        dedent(
+                            """\
+          <?xml version="1.0" encoding="UTF-8"?>
+          <testsuites time="8.89">
+          <testsuite name="Bar" tests="1" failures="1" skipped="0" time="410.63">
+          <testcase classname="Bar/test_3" name="test_3" time="0.02">
+            <failure><![CDATA[Error! Expected Big Sur to be next to the ocean.]]></failure>
+          </testcase>
+          </testsuite>
+          </testsuites>"""
+                        )
+                    )
+                ],
+                [],
+                failure_explanations_list=[
+                    {
+                        "name": "Bar/test_3/test_3",
+                        "explained": True,
+                        "reason": "Big Sur is next to the Pacific.",
+                    }
+                ],
+            ),
+            (
+                dedent(
+                    """\
+          # Foo
+
+          * 1 test failed
+
+          ## Failed Tests
+          (click on a test name to see its output)
+
+          ### Bar
+          <details>
+          <summary>Bar/test_3/test_3 (Likely Already Failing)</summary>
+          Big Sur is next to the Pacific.
+          
+          ```
+          Error! Expected Big Sur to be next to the ocean.
+          ```
+          </details>
+
+          If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the `infrastructure` label."""
+                )
+            ),
+        )
+
     def test_generate_report_end_to_end(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             junit_xml_file = os.path.join(temp_dir, "junit.xml")

>From 06c030dcb4ee57be287beffd96d1b21ef1697dd4 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Wed, 5 Nov 2025 18:23:46 +0000
Subject: [PATCH 2/4] fix

Created using spr 1.3.7
---
 .ci/premerge_advisor_explain.py | 34 ++++++++++++++++-----------------
 .ci/utils.sh                    | 10 +++++-----
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py
index 4d840a33c3cf2..1d487af9e9ec7 100644
--- a/.ci/premerge_advisor_explain.py
+++ b/.ci/premerge_advisor_explain.py
@@ -31,22 +31,11 @@ def get_comment_id(platform: str, pr: github.PullRequest.PullRequest) -> int | N
 def get_comment(
     github_token: str,
     pr_number: int,
-    junit_objects,
-    ninja_logs,
-    advisor_response,
-    return_code,
+    body: str,
 ) -> dict[str, str]:
     repo = github.Github(github_token).get_repo("llvm/llvm-project")
     pr = repo.get_issue(pr_number).as_pull_request()
-    comment = {
-        "body": generate_test_report_lib.generate_report(
-            generate_test_report_lib.compute_platform_title(),
-            return_code,
-            junit_objects,
-            ninja_logs,
-            failure_explanations_list=advisor_response,
-        )
-    }
+    comment = {"body": body}
     comment_id = get_comment_id(platform.system(), pr)
     if comment_id:
         comment["id"] = comment_id
@@ -59,6 +48,14 @@ def main(
     pr_number: int,
     return_code: int,
 ):
+    if return_code == 0:
+        with open("comment", "w") as comment_file_handle:
+            comment = get_comment(
+                ":white_check_mark: With the latest revision this PR passed "
+                "the premerge checks."
+            )
+            if comment["id"]:
+                json.dump([comment], comment_file_handle)
     junit_objects, ninja_logs = generate_test_report_lib.load_info_from_files(
         build_log_files
     )
@@ -90,10 +87,13 @@ def main(
             get_comment(
                 github_token,
                 pr_number,
-                junit_objects,
-                ninja_logs,
-                advisor_response.json(),
-                return_code,
+                generate_test_report_lib.generate_report(
+                    generate_test_report_lib.compute_platform_title(),
+                    return_code,
+                    junit_objects,
+                    ninja_logs,
+                    failure_explanations_list=advisor_response.json(),
+                ),
             )
         ]
         with open("comment", "w") as comment_file_handle:
diff --git a/.ci/utils.sh b/.ci/utils.sh
index 72f4b04f5bf3a..91c27319f3534 100644
--- a/.ci/utils.sh
+++ b/.ci/utils.sh
@@ -33,18 +33,18 @@ function at-exit {
   # If building fails there will be no results files.
   shopt -s nullglob
 
-  if [[ "$GITHUB_STEP_SUMMARY" != "" ]]; then
+  if [[ "$GITHUB_ACTIONS" != "" ]]; then
     python "${MONOREPO_ROOT}"/.ci/generate_test_report_github.py \
       $retcode "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log \
       >> $GITHUB_STEP_SUMMARY
+    python "${MONOREPO_ROOT}"/.ci/premerge_advisor_explain.py \
+      $(git rev-parse HEAD~1) $retcode ${{ secrets.GITHUB_TOKEN }} \
+      $GITHUB_PR_NUMBER "${BUILD_DIR}"/test-results.*.xml \
+      "${MONOREPO_ROOT}"/ninja*.log
   fi
 
   if [[ "$retcode" != "0" ]]; then
     if [[ "$GITHUB_ACTIONS" != "" ]]; then
-      python "${MONOREPO_ROOT}"/.ci/premerge_advisor_explain.py \
-        $(git rev-parse HEAD~1) $retcode ${{ secrets.GITHUB_TOKEN }} \
-        $GITHUB_PR_NUMBER "${BUILD_DIR}"/test-results.*.xml \
-        "${MONOREPO_ROOT}"/ninja*.log
       python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \
         $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \
         "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log

>From 7e44989fceaeec33405c5368e16d999f5701a7b2 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Thu, 6 Nov 2025 16:57:02 +0000
Subject: [PATCH 3/4] docs

Created using spr 1.3.7
---
 .ci/premerge_advisor_explain.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py
index 1d487af9e9ec7..08ccfb3d0e3d4 100644
--- a/.ci/premerge_advisor_explain.py
+++ b/.ci/premerge_advisor_explain.py
@@ -48,6 +48,31 @@ def main(
     pr_number: int,
     return_code: int,
 ):
+    """The main entrypoint for the script.
+
+    This function parses failures from files, requests information from the
+    premerge advisor, and may write a Github comment depending upon the output.
+    There are four different scenarios:
+    1. There has never been a previous failure and the job passes - We do not
+       create a comment. We write out an empty file to the comment path so the
+       issue-write workflow knows not to create anything.
+    2. There has never been a previous failure and the job fails - We create a
+       new comment containing the failure information and any possible premerge
+       advisor findings.
+    3. There has been a previous failure and the job passes - We update the
+       existing comment by passing its ID anda passed message to the
+       issue-write workflow.
+    4. There has been a previous failure and the job fails - We update the
+       existing comment in the same manner as above, but generate the comment
+       as if we have a failure.
+
+    Args:
+      commit_sha: The base commit SHA for this PR run.
+      build_log_files: The list of JUnit XML files and ninja logs.
+      github_token: The token to use to access the Github API.
+      pr_number: The number of the PR associated with this run.
+      return_code: The numerical return code of ninja/CMake.
+    """
     if return_code == 0:
         with open("comment", "w") as comment_file_handle:
             comment = get_comment(

>From 54d16ebf1d740947542ca667e9e43ab703961cc2 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 7 Nov 2025 19:23:21 +0000
Subject: [PATCH 4/4] feedback

Created using spr 1.3.7
---
 .ci/premerge_advisor_explain.py | 2 +-
 .ci/utils.sh                    | 2 +-
 .github/workflows/premerge.yaml | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py
index 08ccfb3d0e3d4..e85c03e29bd2e 100644
--- a/.ci/premerge_advisor_explain.py
+++ b/.ci/premerge_advisor_explain.py
@@ -60,7 +60,7 @@ def main(
        new comment containing the failure information and any possible premerge
        advisor findings.
     3. There has been a previous failure and the job passes - We update the
-       existing comment by passing its ID anda passed message to the
+       existing comment by passing its ID and a passed message to the
        issue-write workflow.
     4. There has been a previous failure and the job fails - We update the
        existing comment in the same manner as above, but generate the comment
diff --git a/.ci/utils.sh b/.ci/utils.sh
index 91c27319f3534..c364f9395d67b 100644
--- a/.ci/utils.sh
+++ b/.ci/utils.sh
@@ -38,7 +38,7 @@ function at-exit {
       $retcode "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log \
       >> $GITHUB_STEP_SUMMARY
     python "${MONOREPO_ROOT}"/.ci/premerge_advisor_explain.py \
-      $(git rev-parse HEAD~1) $retcode ${{ secrets.GITHUB_TOKEN }} \
+      $(git rev-parse HEAD~1) $retcode "${GITHUB_TOKEN}" \
       $GITHUB_PR_NUMBER "${BUILD_DIR}"/test-results.*.xml \
       "${MONOREPO_ROOT}"/ninja*.log
   fi
diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml
index 973d3abf358ce..7f875f27097f1 100644
--- a/.github/workflows/premerge.yaml
+++ b/.github/workflows/premerge.yaml
@@ -64,6 +64,8 @@ jobs:
       - name: Build and Test
         timeout-minutes: 120
         continue-on-error: ${{ runner.arch == 'ARM64' }}
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
         run: |
           git config --global --add safe.directory '*'
 
@@ -153,6 +155,8 @@ jobs:
         timeout-minutes: 180
         if: ${{ steps.vars.outputs.windows-projects != '' }}
         shell: cmd
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
         run: |
           call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64
           # See the comments above in the Linux job for why we define each of