[llvm] [CI] Upstream metrics script and container definition (PR #117461)
Nathan Gauër via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 04:49:13 PST 2024
================
@@ -0,0 +1,180 @@
+import requests
+import time
+import os
+from dataclasses import dataclass
+
+from github import Github
+from github import Auth
+
+GRAFANA_URL = (
+ "https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
+)
+GITHUB_PROJECT = "llvm/llvm-project"
+WORKFLOWS_TO_TRACK = ["Check code formatting"]
+
+
+ at dataclass
+class JobMetrics:
+ job_name: str
+ queue_time: int
+ run_time: int
+ status: int
+ created_at_ns: int
+ workflow_id: int
+
+
+def get_metrics(github_repo, workflows_to_track):
+ """Gets the metrics for specified Github workflows.
+
+ This function takes in a list of workflows to track, and optionally the
+ workflow ID of the last tracked invocation. It grabs the relevant data
+ from Github, returning it to the caller.
+
+ Args:
+ github_repo: A github repo object to use to query the relevant information.
+ workflows_to_track: A dictionary mapping workflow names to the last
+ invocation ID where metrics have been collected, or None to collect the
+ last five results.
+
+ Returns:
+ Returns a list of JobMetrics objects, containing the relevant metrics about
+ the workflow.
+ """
+ workflow_runs = iter(github_repo.get_workflow_runs())
+
+ workflow_metrics = []
+
+ workflows_to_include = {}
+ for workflow_to_track in workflows_to_track:
+ workflows_to_include[workflow_to_track] = True
+ workflows_left_to_include = len(workflows_to_track)
+
+ while True:
+ workflow_run = next(workflow_runs)
+ if workflow_run.status != "completed":
+ continue
+
+ interesting_workflow = False
+ for workflow_name in workflows_to_track:
+ if workflow_run.name == workflow_name:
+ interesting_workflow = True
+ break
+ if not interesting_workflow:
+ continue
+
+ if not workflows_to_include[workflow_run.name]:
+ continue
+
+ workflow_jobs = workflow_run.jobs()
+ if workflow_jobs.totalCount == 0:
+ continue
+ if workflow_jobs.totalCount > 1:
+ raise ValueError(
+ f"Encountered an unexpected number of jobs: {workflow_jobs.totalCount}"
+ )
+
+ created_at = workflow_jobs[0].created_at
+ started_at = workflow_jobs[0].started_at
+ completed_at = workflow_jobs[0].completed_at
+
+ job_result = int(workflow_jobs[0].conclusion == "success")
+
+ queue_time = started_at - created_at
+ run_time = completed_at - started_at
+
+ if run_time.seconds == 0:
+ continue
+
+ if (
+ workflows_to_track[workflow_run.name] is None
+ or workflows_to_track[workflow_run.name] == workflow_run.id
+ ):
+ workflows_left_to_include -= 1
+ workflows_to_include[workflow_run.name] = False
+ if (
+ workflows_to_track[workflow_run.name] is not None
+ and workflows_left_to_include == 0
+ ):
+ break
+
+ created_at_ns = int(created_at.timestamp()) * 10**9
+
+ workflow_metrics.append(
+ JobMetrics(
+ workflow_run.name,
+ queue_time.seconds,
+ run_time.seconds,
+ job_result,
+ created_at_ns,
+ workflow_run.id,
+ )
+ )
+
+ if workflows_left_to_include == 0:
+ break
+
+ return workflow_metrics
+
+
+def upload_metrics(workflow_metrics, metrics_userid, api_key):
+ """Upload metrics to Grafana.
+
+ Takes in a list of workflow metrics and then uploads them to Grafana
+ through a REST request.
+
+ Args:
+ workflow_metrics: A list of metrics to upload to Grafana.
+ metrics_userid: The userid to use for the upload.
+ api_key: The API key to use for the upload.
+ """
+ metrics_batch = []
+ for workflow_metric in workflow_metrics:
+ workflow_formatted_name = workflow_metric.job_name.lower().replace(" ", "_")
+ metrics_batch.append(
+ f"{workflow_formatted_name} queue_time={workflow_metric.queue_time},run_time={workflow_metric.run_time},status={workflow_metric.status} {workflow_metric.created_at_ns}"
+ )
+
+ request_data = "\n".join(metrics_batch)
+ response = requests.post(
+ GRAFANA_URL,
+ headers={"Content-Type": "text/plain"},
+ data=request_data,
+ auth=(metrics_userid, api_key),
+ )
+
+ if response.status_code < 200 or response.status_code >= 300:
+ print(f"Failed to submit data to Grafana: {response.status_code}")
+
+
+def main():
+ # Authenticate with Github
+ auth = Auth.Token(os.environ["GITHUB_TOKEN"])
+ github_object = Github(auth=auth)
+ github_repo = github_object.get_repo("llvm/llvm-project")
+
+ grafana_api_key = os.environ["GRAFANA_API_KEY"]
+ grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"]
+
+ workflows_to_track = {}
+ for workflow_to_track in WORKFLOWS_TO_TRACK:
+ workflows_to_track[workflow_to_track] = None
+
+ # Enter the main loop. Every five minutes we wake up and dump metrics for
+ # the relevant jobs.
+ while True:
+ current_metrics = get_metrics(github_repo, workflows_to_track)
+ if len(current_metrics) == 0:
+ print("No metrics found to upload.")
+ continue
+
+ upload_metrics(current_metrics, grafana_metrics_userid, grafana_api_key)
+ print(f"Uploaded {len(current_metrics)} metrics")
+
+ for workflow_metric in reversed(current_metrics):
+ workflows_to_track[workflow_metric.job_name] = workflow_metric.workflow_id
+
+ time.sleep(5 * 60)
----------------
Keenuts wrote:
```suggestion
# Assuming we add a global SCRAPE_INTERVAL_SECONDS = 5 * 60
time.sleep(SCRAPE_INTERVAL_SECONDS)
```
https://github.com/llvm/llvm-project/pull/117461
More information about the llvm-commits
mailing list