[libcxx-commits] [libcxx] [llvm] [llvm][CI] Add metrics collection for libc++ premerge testing. (PR #152801)

Fri Aug 22 10:06:41 PDT 2025

================
@@ -81,6 +90,139 @@ class GaugeMetric:
     time_ns: int
 
 
+ at dataclass
+class AggregateMetric:
+    aggregate_name: str
+    aggregate_queue_time: int
+    aggregate_run_time: int
+    aggregate_status: int
+    workflow_id: int
+
+
+def create_and_append_libcxx_aggregates(
+    workflow_metrics: list[JobMetrics]) -> list[JobMetrics,AggregateMetric]:
+    """
+    Find libc++ JobMetric entries and create aggregate metrics for them.
+
+    Sort the libc++ JobMetric entries by workflow id, and for each workflow
+    id group them by stages.  Create an aggreate metric for each stage for each
+    unique workflow id.  Append each aggregate metric to the workflow_metrics
+    list.
+
+    How aggreates are computed:
+    queue time: Time from when first job in group is created until last job in
+                group has started.
+    run time: Time from when first job in group starts running until last job
+              in group finishes running.
+    status: logical 'or' of all the job statuses in the group.
+    """
+    # Separate the jobs by workflow_id. Only look at JobMetrics entries.
+    aggregate_data = dict()
+    for job in workflow_metrics:
+        # Only want to look at JobMetrics
+        if not isinstance(job, JobMetrics):
+            continue
+        # Only want libc++ jobs.
+        if job.workflow_name != "Build and Test libc++":
+            continue
+        if job.workflow_id not in aggregate_data.keys():
+            aggregate_data[job.workflow_id] = [ job ]
+        else:
+            aggregate_data[job.workflow_id].append(job)
+
+    # Go through each aggregate_data list (workflow id) and find all the
+    # needed data
+    for ag_workflow_id in aggregate_data:
+        job_list = aggregate_data[ag_workflow_id]
+        stage1_jobs = list()
+        stage2_jobs = list()
+        stage3_jobs = list()
+        # sort jobs into stage1, stage2, & stage3.
+        for job in job_list:
+            if job.job_name.find('stage1') > 0:
+                stage1_jobs.append(job)
+            elif job.job_name.find('stage2') > 0:
+                stage2_jobs.append(job)
+            elif job.job_name.find('stage3') > 0:
+                stage3_jobs.append(job)
+
+        for job_list in [ stage1_jobs, stage2_jobs, stage3_jobs]:
+            if len(job_list) < 1:
+                  # No jobs in that stage this time around.
+                  continue
+
+            # Get the aggregate name.
+            ag_name = "github_libcxx_premerge_checks_"
+            if job_list[0].job_name.find('stage1') > 0:
----------------
boomanaiden154 wrote:

I was suggesting to get rid of the for loop. Instead of something like
```python
for job_list in [ stage1_jobs, stage2_jobs, stage3_jobs]:
  ag_name = "github_libcxx_premerge_checks_"
  if job_list[0].job_name.find('stage1') > 0:
      ag_name = ag_name + "stage1_aggregate"
  elif job_list[0].job_name.find('stage2') > 0:
      ag_name = ag_name + "stage2_aggregate"
  elif job_list[0].job_name.find('stage3') > 0:
      ag_name = ag_name + "stage3_aggregate"
  else:
      ag_name = ag_name + "unknown_aggregate"
```

You could refactor everything under the loop to be a function like `_construct_aggregate(ag_name: str, job_list: list[job]`. Then the code looks like the following:
```python
_construct_aggregate(base_ag_name + "stage1_aggregate", stage1_jobs)
_construct_aggregate(base_ag_name + "stage2_aggregate", stage2_jobs)
_construct_aggregate(base_ag_name + "stage3_aggregate", stage3_jobs)
```

https://github.com/llvm/llvm-project/pull/152801