[llvm] [CI] Track Queue/In Progress Metrics By Job Rather Than Workflow (PR #127274)

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 14 19:07:57 PST 2025


https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/127274

>From 38d3ff227b9caf9cf062c979417605c52dff2a0d Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 14 Feb 2025 23:31:39 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 .ci/metrics/metrics.py | 67 ++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py
index 70b787665a8b9..d05444bb9ff80 100644
--- a/.ci/metrics/metrics.py
+++ b/.ci/metrics/metrics.py
@@ -47,36 +47,51 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
     # Other states are available (pending, waiting, etc), but the meaning
     # is not documented (See #70540).
     # "queued" seems to be the info we want.
-    queued_workflow_count = len(
-        [
-            x
-            for x in github_repo.get_workflow_runs(status="queued")
-            if x.name in WORKFLOWS_TO_TRACK
-        ]
-    )
-    running_workflow_count = len(
-        [
-            x
-            for x in github_repo.get_workflow_runs(status="in_progress")
-            if x.name in WORKFLOWS_TO_TRACK
-        ]
-    )
+    queued_job_counts = {}
+    for queued_workflow in github_repo.get_workflow_runs(status="queued"):
+        if queued_workflow.name not in WORKFLOWS_TO_TRACK:
+            continue
+        for queued_workflow_job in queued_workflow.jobs():
+            job_name = queued_workflow_job.name
+            if queued_workflow_job.status != "queued":
+                continue
+
+            if job_name not in queued_job_counts:
+                queued_job_counts[job_name] = 1
+            else:
+                queued_job_counts[job_name] += 1
+
+    running_job_counts = {}
+    for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
+        if running_workflow.name not in WORKFLOWS_TO_TRACK:
+            continue
+        for running_workflow_job in running_workflow.jobs():
+            job_name = running_workflow_job.name
+            if running_workflow_job.status != "in_progress":
+                continue
+
+            if job_name not in running_job_counts:
+                running_job_counts[job_name] = 1
+            else:
+                running_job_counts[job_name] += 1
 
     workflow_metrics = []
-    workflow_metrics.append(
-        GaugeMetric(
-            "workflow_queue_size",
-            queued_workflow_count,
-            time.time_ns(),
+    for queued_job in queued_job_counts:
+        workflow_metrics.append(
+            GaugeMetric(
+                f"workflow_queue_size_{queued_job}",
+                queued_job_counts[queued_job],
+                time.time_ns(),
+            )
         )
-    )
-    workflow_metrics.append(
-        GaugeMetric(
-            "running_workflow_count",
-            running_workflow_count,
-            time.time_ns(),
+    for running_job in running_job_counts:
+        workflow_metrics.append(
+            GaugeMetric(
+                f"running_workflow_count_{running_job}",
+                running_job_counts[running_job],
+                time.time_ns(),
+            )
         )
-    )
     # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
     workflow_metrics.append(
         GaugeMetric("metrics_container_heartbeat", 1, time.time_ns())

>From 25b31040f2e68c0cada5b97b08418d949cdfa788 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Sat, 15 Feb 2025 03:07:47 +0000
Subject: [PATCH 2/2] Fix edge case

Created using spr 1.3.4
---
 .ci/metrics/metrics.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py
index d05444bb9ff80..5347ce6796697 100644
--- a/.ci/metrics/metrics.py
+++ b/.ci/metrics/metrics.py
@@ -43,25 +43,30 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
       Returns a list of GaugeMetric objects, containing the relevant metrics about
       the workflow
     """
+    queued_job_counts = {}
+    running_job_counts = {}
 
     # Other states are available (pending, waiting, etc), but the meaning
     # is not documented (See #70540).
     # "queued" seems to be the info we want.
-    queued_job_counts = {}
     for queued_workflow in github_repo.get_workflow_runs(status="queued"):
         if queued_workflow.name not in WORKFLOWS_TO_TRACK:
             continue
         for queued_workflow_job in queued_workflow.jobs():
             job_name = queued_workflow_job.name
-            if queued_workflow_job.status != "queued":
-                continue
+            # Workflows marked as queued can potentially only have some jobs
+            # queued, so make sure to also count jobs currently in progress.
+            if queued_workflow_job.status == "queued":
+                if job_name not in queued_job_counts:
+                    queued_job_counts[job_name] = 1
+                else:
+                    queued_job_counts[job_name] += 1
+            elif queued_workflow_job.status == "in_progress":
+                if job_name not in running_job_counts:
+                    running_job_counts[job_name] = 1
+                else:
+                    running_job_counts[job_name] += 1
 
-            if job_name not in queued_job_counts:
-                queued_job_counts[job_name] = 1
-            else:
-                queued_job_counts[job_name] += 1
-
-    running_job_counts = {}
     for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
         if running_workflow.name not in WORKFLOWS_TO_TRACK:
             continue



More information about the llvm-commits mailing list