[llvm] [CI] Rework github workflow processing (PR #130317)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 8 01:09:46 PST 2025
Nathan =?utf-8?q?Gauër?= <brioche at google.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/130317 at github.com>
================
@@ -45,131 +65,116 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
Returns a list of GaugeMetric objects, containing the relevant metrics about
the workflow
"""
- queued_job_counts = {}
- running_job_counts = {}
-
- # Other states are available (pending, waiting, etc), but the meaning
- # is not documented (See #70540).
- # "queued" seems to be the info we want.
- for queued_workflow in github_repo.get_workflow_runs(status="queued"):
- if queued_workflow.name not in WORKFLOWS_TO_TRACK:
- continue
- for queued_workflow_job in queued_workflow.jobs():
- job_name = queued_workflow_job.name
- # Workflows marked as queued can potentially only have some jobs
- # queued, so make sure to also count jobs currently in progress.
- if queued_workflow_job.status == "queued":
- if job_name not in queued_job_counts:
- queued_job_counts[job_name] = 1
- else:
- queued_job_counts[job_name] += 1
- elif queued_workflow_job.status == "in_progress":
- if job_name not in running_job_counts:
- running_job_counts[job_name] = 1
- else:
- running_job_counts[job_name] += 1
-
- for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
- if running_workflow.name not in WORKFLOWS_TO_TRACK:
+ queued_count = collections.Counter()
+ running_count = collections.Counter()
+
+ # Do not apply any filters to this query.
+ # See https://github.com/orgs/community/discussions/86766
+ # Applying filters like `status=completed` will break pagination, and
+ # return a non-sorted and incomplete list of workflows.
+ i = 0
+ for task in iter(github_repo.get_workflow_runs()):
+ if i > GITHUB_WORKFLOWS_COUNT_FOR_SAMPLING:
+ break
+ i += 1
+
+ if task.name not in GITHUB_WORKFLOW_TO_TRACK:
continue
- for running_workflow_job in running_workflow.jobs():
- job_name = running_workflow_job.name
- if running_workflow_job.status != "in_progress":
+
+ prefix_name = GITHUB_WORKFLOW_TO_TRACK[task.name]
+ for job in task.jobs():
+ if job.name not in GITHUB_JOB_TO_TRACK[prefix_name]:
continue
+ suffix_name = GITHUB_JOB_TO_TRACK[prefix_name][job.name]
+ metric_name = f"{prefix_name}_{suffix_name}"
- if job_name not in running_job_counts:
- running_job_counts[job_name] = 1
- else:
- running_job_counts[job_name] += 1
+ # Other states are available (pending, waiting, etc), but the meaning
+ # is not documented (See #70540).
+ # "queued" seems to be the info we want.
+ if job.status == "queued":
+ queued_count[metric_name] += 1
+ elif job.status == "in_progress":
+ running_count[metric_name] += 1
workflow_metrics = []
- for queued_job in queued_job_counts:
+ for name, value in queued_count.items():
workflow_metrics.append(
- GaugeMetric(
- f"workflow_queue_size_{queued_job}",
- queued_job_counts[queued_job],
- time.time_ns(),
- )
+ GaugeMetric(f"workflow_queue_size_{name}", value, time.time_ns())
)
- for running_job in running_job_counts:
+ for name, value in running_count.items():
workflow_metrics.append(
- GaugeMetric(
- f"running_workflow_count_{running_job}",
- running_job_counts[running_job],
- time.time_ns(),
- )
+ GaugeMetric(f"running_workflow_count_{name}", value, time.time_ns())
)
+
# Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
workflow_metrics.append(
GaugeMetric("metrics_container_heartbeat", 1, time.time_ns())
)
return workflow_metrics
-def get_per_workflow_metrics(
- github_repo: github.Repository, workflows_to_track: dict[str, int]
-):
+def get_per_workflow_metrics(github_repo: github.Repository, last_seen_workflow: str):
"""Gets the metrics for specified Github workflows.
This function takes in a list of workflows to track, and optionally the
workflow ID of the last tracked invocation. It grabs the relevant data
from Github, returning it to the caller.
+ If the last_seen_workflow parameter is None, this returns no metrics, but
+ returns the id of the most recent workflow.
Args:
github_repo: A github repo object to use to query the relevant information.
- workflows_to_track: A dictionary mapping workflow names to the last
- invocation ID where metrics have been collected, or None to collect the
- last five results.
+ last_seen_workflow: the last workflow this function processed.
Returns:
- Returns a list of JobMetrics objects, containing the relevant metrics about
- the workflow.
+ Returns a tuple with 2 elements:
----------------
boomanaiden154 wrote:
Can you add this as a type annotation to the function too?
https://github.com/llvm/llvm-project/pull/130317
More information about the llvm-commits
mailing list