[llvm] [ci] Handle the case where all reported tests pass but the build is still a failure (PR #120264)

Tue Dec 17 08:40:34 PST 2024

https://github.com/DavidSpickett created https://github.com/llvm/llvm-project/pull/120264

In this build:
https://buildkite.com/llvm-project/github-pull-requests/builds/126961

The builds actually failed, probably because prerequisite of a test suite failed to build.

However they still ran other tests and all those passed. This meant that the test reports were green even though the build was red. On some level this is technically correct, but it is very misleading in practice.

So I've also passed the build script's return code, as it was when we entered the on exit handler, to the generator, so that when this happens again, the report will draw the viewer's attention to the overall failure. There will be a link in the report to the build's log file, so the next step to investigate is clear.

It would be nice to say "tests failed and there was some other build error", but we cannot tell what the non-zero return code was caused by. Could be either.

>From 4d26038f4d2c3bfd0769271a736044bb0585082f Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Tue, 17 Dec 2024 16:25:34 +0000
Subject: [PATCH] [ci] Handle the case where all reported tests pass but the
 build is still a failure

In this build:
https://buildkite.com/llvm-project/github-pull-requests/builds/126961

The builds actually failed, probably because prerequisite of a test suite
failed to build.

However they still ran other tests and all those passed. This meant that
the test reports were green even though the build was red. On some level
this is technically correct, but's very misleading in practice.

So I've also passed the build script's return code, as it was when
we entered the on exit handler, to the generator.

So that when this happens again, the report will not list any test failures
but will draw the viewer's attention to the overall failure.

There will be a link in the report to the build's log file, so the
next step to investigate is clear.

It would be nice to say "tests failed and there was some other build error",
but we cannot tell what the non-zero return code was caused by. Could
be either.
---
 .ci/generate_test_report.py | 102 +++++++++++++++++++++++++++++++-----
 .ci/monolithic-linux.sh     |   4 +-
 .ci/monolithic-windows.sh   |   4 +-
 3 files changed, 94 insertions(+), 16 deletions(-)

diff --git a/.ci/generate_test_report.py b/.ci/generate_test_report.py
index ff601a0cde1063..6f2137e7803bb2 100644
--- a/.ci/generate_test_report.py
+++ b/.ci/generate_test_report.py
@@ -19,12 +19,13 @@ def junit_from_xml(xml):
 
 class TestReports(unittest.TestCase):
     def test_title_only(self):
-        self.assertEqual(_generate_report("Foo", []), ("", "success"))
+        self.assertEqual(_generate_report("Foo", 0, []), ("", "success"))
 
     def test_no_tests_in_testsuite(self):
         self.assertEqual(
             _generate_report(
                 "Foo",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -45,6 +46,7 @@ def test_no_failures(self):
         self.assertEqual(
             _generate_report(
                 "Foo",
+                0,
                 [
                     junit_from_xml(
                         dedent(
@@ -70,10 +72,51 @@ def test_no_failures(self):
             ),
         )
 
+    def test_no_failures_build_failed(self):
+        self.assertEqual(
+            _generate_report(
+                "Foo",
+                1,
+                [
+                    junit_from_xml(
+                        dedent(
+                            """\
+          <?xml version="1.0" encoding="UTF-8"?>
+          <testsuites time="0.00">
+          <testsuite name="Passed" tests="1" failures="0" skipped="0" time="0.00">
+          <testcase classname="Bar/test_1" name="test_1" time="0.00"/>
+          </testsuite>
+          </testsuites>"""
+                        )
+                    )
+                ],
+                buildkite_info={
+                    "BUILDKITE_ORGANIZATION_SLUG": "organization_slug",
+                    "BUILDKITE_PIPELINE_SLUG": "pipeline_slug",
+                    "BUILDKITE_BUILD_NUMBER": "build_number",
+                    "BUILDKITE_JOB_ID": "job_id",
+                },
+            ),
+            (
+                dedent(
+                    """\
+              # Foo
+
+              * 1 test passed
+
+              All tests passed but another part of the build **failed**.
+
+              [Download](https://buildkite.com/organizations/organization_slug/pipelines/pipeline_slug/builds/build_number/jobs/job_id/download.txt) the build's log file to see the details."""
+                ),
+                "error",
+            ),
+        )
+
     def test_report_single_file_single_testsuite(self):
         self.assertEqual(
             _generate_report(
                 "Foo",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -166,6 +209,7 @@ def test_report_single_file_multiple_testsuites(self):
         self.assertEqual(
             _generate_report(
                 "ABC and DEF",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -198,6 +242,7 @@ def test_report_multiple_files_multiple_testsuites(self):
         self.assertEqual(
             _generate_report(
                 "ABC and DEF",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -238,6 +283,7 @@ def test_report_dont_list_failures(self):
         self.assertEqual(
             _generate_report(
                 "Foo",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -272,6 +318,7 @@ def test_report_dont_list_failures_link_to_log(self):
         self.assertEqual(
             _generate_report(
                 "Foo",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -312,6 +359,7 @@ def test_report_size_limit(self):
         self.assertEqual(
             _generate_report(
                 "Foo",
+                1,
                 [
                     junit_from_xml(
                         dedent(
@@ -351,12 +399,18 @@ def test_report_size_limit(self):
 # and output will not be.
 def _generate_report(
     title,
+    return_code,
     junit_objects,
     size_limit=1024 * 1024,
     list_failures=True,
     buildkite_info=None,
 ):
     if not junit_objects:
+        # Note that we do not post an empty report, therefore we can ignore a
+        # non-zero return code in situations like this.
+        #
+        # If we were going to post a report, then yes, it would be misleading
+        # to say we succeeded when the final return code was non-zero.
         return ("", "success")
 
     failures = {}
@@ -385,7 +439,11 @@ def _generate_report(
     if not tests_run:
         return ("", None)
 
-    style = "error" if tests_failed else "success"
+    style = "success"
+    # Either tests failed, or all tests passed but something failed to build.
+    if tests_failed or return_code != 0:
+        style = "error"
+
     report = [f"# {title}", ""]
 
     tests_passed = tests_run - tests_skipped - tests_failed
@@ -400,17 +458,17 @@ def plural(num_tests):
     if tests_failed:
         report.append(f"* {tests_failed} {plural(tests_failed)} failed")
 
-    if not list_failures:
-        if buildkite_info is not None:
-            log_url = (
-                "https://buildkite.com/organizations/{BUILDKITE_ORGANIZATION_SLUG}/"
-                "pipelines/{BUILDKITE_PIPELINE_SLUG}/builds/{BUILDKITE_BUILD_NUMBER}/"
-                "jobs/{BUILDKITE_JOB_ID}/download.txt".format(**buildkite_info)
-            )
-            download_text = f"[Download]({log_url})"
-        else:
-            download_text = "Download"
+    if buildkite_info is not None:
+        log_url = (
+            "https://buildkite.com/organizations/{BUILDKITE_ORGANIZATION_SLUG}/"
+            "pipelines/{BUILDKITE_PIPELINE_SLUG}/builds/{BUILDKITE_BUILD_NUMBER}/"
+            "jobs/{BUILDKITE_JOB_ID}/download.txt".format(**buildkite_info)
+        )
+        download_text = f"[Download]({log_url})"
+    else:
+        download_text = "Download"
 
+    if not list_failures:
         report.extend(
             [
                 "",
@@ -435,11 +493,23 @@ def plural(num_tests):
                         "</details>",
                     ]
                 )
+    elif return_code != 0:
+        # No tests failed but the build was in a failed state. Bring this to the user's
+        # attention.
+        report.extend(
+            [
+                "",
+                "All tests passed but another part of the build **failed**.",
+                "",
+                f"{download_text} the build's log file to see the details.",
+            ]
+        )
 
     report = "\n".join(report)
     if len(report.encode("utf-8")) > size_limit:
         return _generate_report(
             title,
+            return_code,
             junit_objects,
             size_limit,
             list_failures=False,
@@ -449,9 +519,10 @@ def plural(num_tests):
     return report, style
 
 
-def generate_report(title, junit_files, buildkite_info):
+def generate_report(title, return_code, junit_files, buildkite_info):
     return _generate_report(
         title,
+        return_code,
         [JUnitXml.fromfile(p) for p in junit_files],
         buildkite_info=buildkite_info,
     )
@@ -463,6 +534,7 @@ def generate_report(title, junit_files, buildkite_info):
         "title", help="Title of the test report, without Markdown formatting."
     )
     parser.add_argument("context", help="Annotation context to write to.")
+    parser.add_argument("return_code", help="The build's return code.", type=int)
     parser.add_argument("junit_files", help="Paths to JUnit report files.", nargs="*")
     args = parser.parse_args()
 
@@ -477,7 +549,9 @@ def generate_report(title, junit_files, buildkite_info):
     if len(buildkite_info) != len(env_var_names):
         buildkite_info = None
 
-    report, style = generate_report(args.title, args.junit_files, buildkite_info)
+    report, style = generate_report(
+        args.title, args.return_code, args.junit_files, buildkite_info
+    )
 
     if report:
         p = subprocess.Popen(
diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh
index 4bfebd5f752795..55741bc8310469 100755
--- a/.ci/monolithic-linux.sh
+++ b/.ci/monolithic-linux.sh
@@ -29,6 +29,8 @@ if [[ -n "${CLEAR_CACHE:-}" ]]; then
 fi
 
 function at-exit {
+  retcode=$?
+
   mkdir -p artifacts
   ccache --print-stats > artifacts/ccache_stats.txt
 
@@ -37,7 +39,7 @@ function at-exit {
   if command -v buildkite-agent 2>&1 >/dev/null
   then
     python3 "${MONOREPO_ROOT}"/.ci/generate_test_report.py ":linux: Linux x64 Test Results" \
-      "linux-x64-test-results" "${BUILD_DIR}"/test-results.*.xml
+      "linux-x64-test-results" $retcode "${BUILD_DIR}"/test-results.*.xml
   fi
 }
 trap at-exit EXIT
diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh
index 25cdd2f419f479..68303a3ea153a9 100755
--- a/.ci/monolithic-windows.sh
+++ b/.ci/monolithic-windows.sh
@@ -28,6 +28,8 @@ fi
 
 sccache --zero-stats
 function at-exit {
+  retcode=$?
+
   mkdir -p artifacts
   sccache --show-stats >> artifacts/sccache_stats.txt
 
@@ -36,7 +38,7 @@ function at-exit {
   if command -v buildkite-agent 2>&1 >/dev/null
   then
     python "${MONOREPO_ROOT}"/.ci/generate_test_report.py ":windows: Windows x64 Test Results" \
-      "windows-x64-test-results" "${BUILD_DIR}"/test-results.*.xml
+      "windows-x64-test-results" $retcode "${BUILD_DIR}"/test-results.*.xml
   fi
 }
 trap at-exit EXIT