[llvm] [libc++] Switch over to the LLVM-wide premerge runners (PR #141652)

Louis Dionne via llvm-commits llvm-commits at lists.llvm.org
Tue May 27 11:26:38 PDT 2025


https://github.com/ldionne created https://github.com/llvm/llvm-project/pull/141652

The libc++ self-hosted runner infrastructure has been unstable for a while. Instead, switch over to the runners used for the rest of LLVM's premerge testing, which are better supported.

This also allows us to remove the "job restarter" workflow, which isn't needed anymore since the LLVM-wide runners can't get preempted at arbitrary times.

NOTE: This PR is exploratory only. While we do have an ideal goal to move the libc++ runners to the common monorepo infrastructure, there are other questions to answer before we can do that. The goal of this PR is mainly to shake out any obvious technical issue in making that move.

>From 1621b53dd0b308f027a3dd259ddeabadca5916da Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Tue, 27 May 2025 14:21:11 -0400
Subject: [PATCH] [libc++] Switch over to the LLVM-wide premerge runners

The libc++ self-hosted runner infrastructure has been unstable for
a while. Instead, switch over to the runners used for the rest of
LLVM's premerge testing, which are better supported.

This also allows us to remove the "job restarter" workflow, which
isn't needed anymore since the LLVM-wide runners can't get preempted
at arbitrary times.

NOTE: This PR is exploratory only. While we do have an ideal goal to
      move the libc++ runners to the common monorepo infrastructure,
      there are other questions to answer before we can do that. The
      goal of this PR is mainly to shake out any obvious technical
      issue in making that move.
---
 .github/workflows/libcxx-build-and-test.yaml  |  16 +-
 .../libcxx-restart-preempted-jobs.yaml        | 243 ------------------
 2 files changed, 8 insertions(+), 251 deletions(-)
 delete mode 100644 .github/workflows/libcxx-restart-preempted-jobs.yaml

diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml
index d05e6a559e28d..161d2302e3ed4 100644
--- a/.github/workflows/libcxx-build-and-test.yaml
+++ b/.github/workflows/libcxx-build-and-test.yaml
@@ -36,7 +36,7 @@ concurrency:
 jobs:
   stage1:
     if: github.repository_owner == 'llvm'
-    runs-on: libcxx-self-hosted-linux
+    runs-on: llvm-premerge-linux-runners
     container: ghcr.io/llvm/libcxx-linux-builder:b060022103f551d8ca1dad84122ef73927c86512
     continue-on-error: false
     strategy:
@@ -74,7 +74,7 @@ jobs:
             **/crash_diagnostics/*
   stage2:
     if: github.repository_owner == 'llvm'
-    runs-on: libcxx-self-hosted-linux
+    runs-on: llvm-premerge-linux-runners
     container: ghcr.io/llvm/libcxx-linux-builder:2b57ebb50b6d418e70382e655feaa619b558e254
     needs: [ stage1 ]
     continue-on-error: false
@@ -149,19 +149,19 @@ jobs:
           'generic-static',
           'bootstrapping-build'
         ]
-        machine: [ 'libcxx-self-hosted-linux' ]
+        machine: [ 'llvm-premerge-linux-runners' ]
         include:
         - config: 'generic-cxx26'
-          machine: libcxx-self-hosted-linux
+          machine: llvm-premerge-linux-runners
         - config: 'generic-asan'
-          machine: libcxx-self-hosted-linux
+          machine: llvm-premerge-linux-runners
         - config: 'generic-tsan'
-          machine: libcxx-self-hosted-linux
+          machine: llvm-premerge-linux-runners
         - config: 'generic-ubsan'
-          machine: libcxx-self-hosted-linux
+          machine: llvm-premerge-linux-runners
         # Use a larger machine for MSAN to avoid timeout and memory allocation issues.
         - config: 'generic-msan'
-          machine: libcxx-self-hosted-linux
+          machine: llvm-premerge-linux-runners
     runs-on: ${{ matrix.machine }}
     container: ghcr.io/llvm/libcxx-linux-builder:2b57ebb50b6d418e70382e655feaa619b558e254
     steps:
diff --git a/.github/workflows/libcxx-restart-preempted-jobs.yaml b/.github/workflows/libcxx-restart-preempted-jobs.yaml
deleted file mode 100644
index 7b341d7f22e41..0000000000000
--- a/.github/workflows/libcxx-restart-preempted-jobs.yaml
+++ /dev/null
@@ -1,243 +0,0 @@
-name: Restart Preempted Libc++ Workflow
-
-# The libc++ builders run on preemptable VMs, which can be shutdown at any time.
-# This workflow identifies when a workflow run was canceled due to the VM being preempted,
-# and restarts the workflow run.
-
-# We identify a canceled workflow run by checking the annotations of the check runs in the check suite,
-# which should contain the message "The runner has received a shutdown signal."
-
-# Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow.
-
-on:
-  workflow_run:
-    workflows: [Build and Test libc\+\+]
-    types:
-      - completed
-
-permissions:
-  contents: read
-
-jobs:
-  restart:
-    if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled')
-    name: "Restart Job"
-    permissions:
-      statuses: read
-      checks: write
-      actions: write
-    runs-on: ubuntu-24.04
-    steps:
-      - name: "Restart Job"
-        uses: actions/github-script at 60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
-        with:
-          script: |
-            const failure_regex = /Process completed with exit code 1./
-            const preemption_regex = /The runner has received a shutdown signal/
-
-            const wf_run = context.payload.workflow_run
-            core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
-
-
-            async function create_check_run(conclusion, message) {
-                // Create a check run on the given workflow run to indicate if
-                // we are restarting the workflow or not.
-                if (conclusion != 'success' && conclusion != 'skipped' && conclusion != 'neutral') {
-                  core.setFailed('Invalid conclusion: ' + conclusion)
-                }
-                await github.rest.checks.create({
-                    owner: context.repo.owner,
-                    repo: context.repo.repo,
-                    name: 'Restart Preempted Job',
-                    head_sha: wf_run.head_sha,
-                    status: 'completed',
-                    conclusion: conclusion,
-                    output: {
-                      title: 'Restarted Preempted Job',
-                      summary: message
-                    }
-                })
-            }
-
-            console.log('Listing check runs for suite')
-            const check_suites = await github.rest.checks.listForSuite({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              check_suite_id: context.payload.workflow_run.check_suite_id,
-              per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
-            })
-
-            check_run_ids = [];
-            for (check_run of check_suites.data.check_runs) {
-              console.log('Checking check run: ' + check_run.id);
-              if (check_run.status != 'completed') {
-                console.log('Check run was not completed. Skipping.');
-                continue;
-              }
-              if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
-                console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.');
-                continue;
-              }
-              check_run_ids.push(check_run.id);
-            }
-
-            has_preempted_job = false;
-
-            for (check_run_id of check_run_ids) {
-              console.log('Listing annotations for check run: ' + check_run_id);
-
-              annotations = await github.rest.checks.listAnnotations({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                check_run_id: check_run_id
-              })
-
-              // For temporary debugging purposes to see the structure of the annotations.
-              console.log(annotations);
-
-              has_failed_job = false;
-              saved_failure_message = null;
-
-              for (annotation of annotations.data) {
-                if (annotation.annotation_level != 'failure') {
-                  continue;
-                }
-
-                const preemption_match = annotation.message.match(preemption_regex);
-
-                if (preemption_match != null) {
-                  console.log('Found preemption message: ' + annotation.message);
-                  has_preempted_job = true;
-                }
-
-                const failure_match = annotation.message.match(failure_regex);
-                if (failure_match != null) {
-                  has_failed_job = true;
-                  saved_failure_message = annotation.message;
-                }
-              }
-              if (has_failed_job && (! has_preempted_job)) {
-                // We only want to restart the workflow if all of the failures were due to preemption.
-                // We don't want to restart the workflow if there were other failures.
-                //
-                // However, libcxx runners running inside docker containers produce both a preemption message and failure message.
-                //
-                // The desired approach is to ignore failure messages which appear on the same job as a preemption message
-                // (An job is a single run with a specific configuration, ex generic-gcc, gcc-14).
-                //
-                // However, it's unclear that this code achieves the desired approach, and it may ignore all failures
-                // if a preemption message is found at all on any run.
-                //
-                // For now, it's more important to restart preempted workflows than to avoid restarting workflows with
-                // non-preemption failures.
-                //
-                // TODO Figure this out.
-                core.notice('Choosing not to rerun workflow because we found a non-preemption failure' +
-                  'Failure message: "' + saved_failure_message + '"');
-                await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n'
-                    + 'Failure message: ' + saved_failure_message)
-                return;
-              }
-            }
-
-            if (!has_preempted_job) {
-              core.notice('No preempted jobs found. Not restarting workflow.');
-              await create_check_run('neutral', 'No preempted jobs found. Not restarting workflow.')
-              return;
-            }
-
-            core.notice("Restarted workflow: " + context.payload.workflow_run.id);
-            await github.rest.actions.reRunWorkflowFailedJobs({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                run_id: context.payload.workflow_run.id
-              })
-            await create_check_run('success', 'Restarted workflow run due to preempted job')
-
-  restart-test:
-    if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled') && github.event.actor.login == 'ldionne' # TESTING ONLY
-    name: "Restart Job (test)"
-    permissions:
-      statuses: read
-      checks: write
-      actions: write
-    runs-on: ubuntu-24.04
-    steps:
-      - name: "Restart Job (test)"
-        uses: actions/github-script at 60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
-        with:
-          script: |
-            const FAILURE_REGEX = /Process completed with exit code 1./
-            const PREEMPTION_REGEX = /(The runner has received a shutdown signal)|(The operation was canceled)/
-
-            function log(msg) {
-              core.notice(msg)
-            }
-
-            const wf_run = context.payload.workflow_run
-            log(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
-
-            log('Listing check runs for suite')
-            const check_suites = await github.rest.checks.listForSuite({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              check_suite_id: context.payload.workflow_run.check_suite_id,
-              per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
-            })
-
-            preemptions = [];
-            legitimate_failures = [];
-            for (check_run of check_suites.data.check_runs) {
-              log(`Checking check run: ${check_run.id}`);
-              if (check_run.status != 'completed') {
-                log('Check run was not completed. Skipping.');
-                continue;
-              }
-
-              if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
-                log(`Check run had conclusion: ${check_run.conclusion}. Skipping.`);
-                continue;
-              }
-
-              annotations = await github.rest.checks.listAnnotations({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                check_run_id: check_run.id
-              })
-
-              preemption_annotation = annotations.data.find(function(annotation) {
-                return annotation.annotation_level == 'failure' &&
-                       annotation.message.match(PREEMPTION_REGEX) != null;
-              });
-              if (preemption_annotation != null) {
-                log(`Found preemption message: ${preemption_annotation.message}`);
-                preemptions.push(check_run);
-                break;
-              }
-
-              failure_annotation = annotations.data.find(function(annotation) {
-                return annotation.annotation_level == 'failure' &&
-                       annotation.message.match(FAILURE_REGEX) != null;
-              });
-              if (failure_annotation != null) {
-                log(`Found legitimate failure annotation: ${failure_annotation.message}`);
-                legitimate_failures.push(check_run);
-                break;
-              }
-            }
-
-            if (preemptions) {
-              log('Found some preempted jobs');
-              if (legitimate_failures) {
-                log('Also found some legitimate failures, so not restarting the workflow.');
-              } else {
-                log('Did not find any legitimate failures. Restarting workflow.');
-                await github.rest.actions.reRunWorkflowFailedJobs({
-                  owner: context.repo.owner,
-                  repo: context.repo.repo,
-                  run_id: context.payload.workflow_run.id
-                })
-              }
-            } else {
-              log('Did not find any preempted jobs. Not restarting the workflow.');
-            }



More information about the llvm-commits mailing list