[llvm] [CI] Rework github workflow processing (PR #130317)

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 8 01:09:46 PST 2025


Nathan =?utf-8?q?Gauër?= <brioche at google.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/130317 at github.com>


================
@@ -45,131 +65,116 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
       Returns a list of GaugeMetric objects, containing the relevant metrics about
       the workflow
     """
-    queued_job_counts = {}
-    running_job_counts = {}
-
-    # Other states are available (pending, waiting, etc), but the meaning
-    # is not documented (See #70540).
-    # "queued" seems to be the info we want.
-    for queued_workflow in github_repo.get_workflow_runs(status="queued"):
-        if queued_workflow.name not in WORKFLOWS_TO_TRACK:
-            continue
-        for queued_workflow_job in queued_workflow.jobs():
-            job_name = queued_workflow_job.name
-            # Workflows marked as queued can potentially only have some jobs
-            # queued, so make sure to also count jobs currently in progress.
-            if queued_workflow_job.status == "queued":
-                if job_name not in queued_job_counts:
-                    queued_job_counts[job_name] = 1
-                else:
-                    queued_job_counts[job_name] += 1
-            elif queued_workflow_job.status == "in_progress":
-                if job_name not in running_job_counts:
-                    running_job_counts[job_name] = 1
-                else:
-                    running_job_counts[job_name] += 1
-
-    for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
-        if running_workflow.name not in WORKFLOWS_TO_TRACK:
+    queued_count = collections.Counter()
+    running_count = collections.Counter()
+
+    # Do not apply any filters to this query.
+    # See https://github.com/orgs/community/discussions/86766
+    # Applying filters like `status=completed` will break pagination, and
+    # return a non-sorted and incomplete list of workflows.
+    i = 0
+    for task in iter(github_repo.get_workflow_runs()):
+        if i > GITHUB_WORKFLOWS_COUNT_FOR_SAMPLING:
+            break
+        i += 1
+
+        if task.name not in GITHUB_WORKFLOW_TO_TRACK:
             continue
-        for running_workflow_job in running_workflow.jobs():
-            job_name = running_workflow_job.name
-            if running_workflow_job.status != "in_progress":
+
+        prefix_name = GITHUB_WORKFLOW_TO_TRACK[task.name]
+        for job in task.jobs():
+            if job.name not in GITHUB_JOB_TO_TRACK[prefix_name]:
                 continue
+            suffix_name = GITHUB_JOB_TO_TRACK[prefix_name][job.name]
+            metric_name = f"{prefix_name}_{suffix_name}"
 
-            if job_name not in running_job_counts:
-                running_job_counts[job_name] = 1
-            else:
-                running_job_counts[job_name] += 1
+            # Other states are available (pending, waiting, etc), but the meaning
+            # is not documented (See #70540).
+            # "queued" seems to be the info we want.
+            if job.status == "queued":
+                queued_count[metric_name] += 1
+            elif job.status == "in_progress":
+                running_count[metric_name] += 1
 
     workflow_metrics = []
-    for queued_job in queued_job_counts:
+    for name, value in queued_count.items():
         workflow_metrics.append(
-            GaugeMetric(
-                f"workflow_queue_size_{queued_job}",
-                queued_job_counts[queued_job],
-                time.time_ns(),
-            )
+            GaugeMetric(f"workflow_queue_size_{name}", value, time.time_ns())
         )
-    for running_job in running_job_counts:
+    for name, value in running_count.items():
         workflow_metrics.append(
-            GaugeMetric(
-                f"running_workflow_count_{running_job}",
-                running_job_counts[running_job],
-                time.time_ns(),
-            )
+            GaugeMetric(f"running_workflow_count_{name}", value, time.time_ns())
         )
+
     # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
     workflow_metrics.append(
         GaugeMetric("metrics_container_heartbeat", 1, time.time_ns())
     )
     return workflow_metrics
 
 
-def get_per_workflow_metrics(
-    github_repo: github.Repository, workflows_to_track: dict[str, int]
-):
+def get_per_workflow_metrics(github_repo: github.Repository, last_seen_workflow: str):
     """Gets the metrics for specified Github workflows.
 
     This function takes in a list of workflows to track, and optionally the
     workflow ID of the last tracked invocation. It grabs the relevant data
     from Github, returning it to the caller.
+    If the last_seen_workflow parameter is None, this returns no metrics, but
+    returns the id of the most recent workflow.
 
     Args:
       github_repo: A github repo object to use to query the relevant information.
-      workflows_to_track: A dictionary mapping workflow names to the last
-        invocation ID where metrics have been collected, or None to collect the
-        last five results.
+      last_seen_workflow: the last workflow this function processed.
 
     Returns:
-      Returns a list of JobMetrics objects, containing the relevant metrics about
-      the workflow.
+      Returns a tuple with 2 elements:
----------------
boomanaiden154 wrote:

Can you add this as a type annotation to the function too?

https://github.com/llvm/llvm-project/pull/130317


More information about the llvm-commits mailing list