[libcxx-commits] [libcxx] [libc++] Improve the script to manage libc++ conformance issues (PR #172905)
Louis Dionne via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Dec 18 13:02:38 PST 2025
https://github.com/ldionne created https://github.com/llvm/llvm-project/pull/172905
The previous script was fairly inflexible. This patch refactors the script into a tool that can be used in various ways to manage the conformance-tracking bits of libc++. This should make it possible to synchronize the CSV status files, but also to find Github issues that aren't linked to the 'C++ Standards Conformance' project, to create missing issues more easily, etc.
>From db19eba84af4ed3bdfa50fddcd3dcebda1e21272 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Wed, 17 Dec 2025 18:31:02 -0500
Subject: [PATCH] [libc++] Improve the script to manage libc++ conformance
issues
The previous script was fairly inflexible. This patch refactors the
script into a tool that can be used in various ways to manage the
conformance-tracking bits of libc++. This should make it possible to
synchronize the CSV status files, but also to find Github issues
that aren't linked to the 'C++ Standards Conformance' project, to
create missing issues more easily, etc.
---
libcxx/utils/conformance | 642 +++++++++++++++++++
libcxx/utils/requirements.txt | 1 +
libcxx/utils/synchronize_csv_status_files.py | 472 --------------
3 files changed, 643 insertions(+), 472 deletions(-)
create mode 100755 libcxx/utils/conformance
delete mode 100755 libcxx/utils/synchronize_csv_status_files.py
diff --git a/libcxx/utils/conformance b/libcxx/utils/conformance
new file mode 100755
index 0000000000000..eba923ddbcc74
--- /dev/null
+++ b/libcxx/utils/conformance
@@ -0,0 +1,642 @@
+#!/usr/bin/env python3
+# ===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===----------------------------------------------------------------------===##
+
+from typing import List, Dict, Tuple, IO, Optional
+import copy
+import csv
+import json
+import logging
+import re
+import subprocess
+
+import click
+
+# Number of the 'Libc++ Standards Conformance' project on Github
+LIBCXX_CONFORMANCE_PROJECT = '31'
+
+# Project ID for 'Libc++ Standards Conformance'
+LIBCXX_CONFORMANCE_PROJECT_ID = 'PVT_kwDOAQWwKc4AlOgt'
+
+# Field ID for the 'Meeting Voted' field of the 'Libc++ Standards Conformance' project.
+LIBCXX_CONFORMANCE_MEETING_VOTED_ID = 'PVTF_lADOAQWwKc4AlOgtzgdUEXI'
+
+# Field ID for the 'Status' field of the 'Libc++ Standards Conformance' project.
+LIBCXX_CONFORMANCE_STATUS_ID = 'PVTSSF_lADOAQWwKc4AlOgtzgdUBak'
+
+def extract_between_markers(text: str, begin_marker: str, end_marker: str) -> Optional[str]:
+ """
+ Given a string containing special markers, extract everything located beetwen these markers.
+
+ If the beginning marker is not found, None is returned. If the beginning marker is found but
+ there is no end marker, it is an error (this is done to avoid silently accepting inputs that
+ are erroneous by mistake).
+ """
+ start = text.find(begin_marker)
+ if start == -1:
+ return None
+
+ start += len(begin_marker) # skip the marker itself
+ end = text.find(end_marker, start)
+ if end == -1:
+ raise ArgumentError(f"Could not find end marker {end_marker} in: {text[start:]}")
+
+ return text[start:end]
+
+class PaperStatus:
+ TODO = 1
+ IN_PROGRESS = 2
+ PARTIAL = 3
+ DONE = 4
+ NOTHING_TO_DO = 5
+
+ _status: int
+
+ _original: Optional[str]
+ """
+ Optional string from which the paper status was created. This is used to carry additional
+ information from CSV rows, like any notes associated to the status.
+ """
+
+ def __init__(self, status: int, original: Optional[str] = None):
+ self._status = status
+ self._original = original
+
+ def __eq__(self, other) -> bool:
+ return self._status == other._status
+
+ def __lt__(self, other) -> bool:
+ relative_order = {
+ PaperStatus.TODO: 0,
+ PaperStatus.IN_PROGRESS: 1,
+ PaperStatus.PARTIAL: 2,
+ PaperStatus.DONE: 3,
+ PaperStatus.NOTHING_TO_DO: 3,
+ }
+ return relative_order[self._status] < relative_order[other._status]
+
+ @staticmethod
+ def from_csv_entry(entry: str):
+ """
+ Parse a paper status out of a CSV row entry. Entries can look like:
+ - '' (an empty string, which means the paper is not done yet)
+ - '|In Progress|'
+ - '|Partial|'
+ - '|Complete|'
+ - '|Nothing To Do|'
+ """
+ if entry == '':
+ return PaperStatus(PaperStatus.TODO, entry)
+ elif entry == '|In Progress|':
+ return PaperStatus(PaperStatus.IN_PROGRESS, entry)
+ elif entry == '|Partial|':
+ return PaperStatus(PaperStatus.PARTIAL, entry)
+ elif entry == '|Complete|':
+ return PaperStatus(PaperStatus.DONE, entry)
+ elif entry == '|Nothing To Do|':
+ return PaperStatus(PaperStatus.NOTHING_TO_DO, entry)
+ else:
+ raise RuntimeError(f'Unexpected CSV entry for status: {entry}')
+
+ @staticmethod
+ def from_github_issue(issue: Dict):
+ """
+ Parse a paper status out of a Github issue obtained from querying a Github project.
+ """
+ if 'status' not in issue:
+ return PaperStatus(PaperStatus.TODO)
+ elif issue['status'] == 'Todo':
+ return PaperStatus(PaperStatus.TODO)
+ elif issue['status'] == 'In Progress':
+ return PaperStatus(PaperStatus.IN_PROGRESS)
+ elif issue['status'] == 'Partial':
+ return PaperStatus(PaperStatus.PARTIAL)
+ elif issue['status'] == 'Done':
+ return PaperStatus(PaperStatus.DONE)
+ elif issue['status'] == 'Nothing To Do':
+ return PaperStatus(PaperStatus.NOTHING_TO_DO)
+ else:
+ raise RuntimeError(f"Received unrecognizable Github issue status: {issue['status']}")
+
+ def to_csv_entry(self) -> str:
+ """
+ Return the issue state formatted for a CSV entry. The status is formatted as '|Complete|',
+ '|In Progress|', etc.
+ """
+ mapping = {
+ PaperStatus.TODO: '',
+ PaperStatus.IN_PROGRESS: '|In Progress|',
+ PaperStatus.PARTIAL: '|Partial|',
+ PaperStatus.DONE: '|Complete|',
+ PaperStatus.NOTHING_TO_DO: '|Nothing To Do|',
+ }
+ return self._original if self._original is not None else mapping[self._status]
+
+ def to_project_single_select_option(self) -> str:
+ """
+ Maps this Status to a single-select option in the libc++ Standards Conformance Github project.
+ """
+ # Values for the various options of the 'Status' field in the libc++ Standards Conformance project.
+ mapping = {
+ PaperStatus.TODO: 'f75ad846',
+ PaperStatus.IN_PROGRESS: '47fc9ee4',
+ PaperStatus.PARTIAL: '44a9df65',
+ PaperStatus.DONE: '98236657',
+ PaperStatus.NOTHING_TO_DO: '18422253',
+ }
+ return mapping[self._status]
+
+class PaperInfo:
+ paper_number: str
+ """
+ Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
+ """
+
+ paper_name: str
+ """
+ Plain text string representing the name of the paper.
+ """
+
+ status: PaperStatus
+ """
+ Status of the paper/issue. This can be complete, in progress, partial, or done.
+ """
+
+ meeting: Optional[str]
+ """
+ Plain text string representing the meeting at which the paper/issue was voted.
+ """
+
+ first_released_version: Optional[str]
+ """
+ First version of LLVM in which this paper/issue was resolved.
+ """
+
+ github_issue: Optional[str]
+ """
+ Optional number of the Github issue tracking the implementation status of this paper.
+ This is used to cross-reference rows in the status pages with Github issues.
+ """
+
+ notes: Optional[str]
+ """
+ Optional plain text string representing notes to associate to the paper.
+ This is used to populate the "Notes" column in the CSV status pages.
+ """
+
+ original: Optional[object]
+ """
+ Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
+ was used to generate this PaperInfo and is useful for error reporting purposes.
+ """
+
+ def __init__(self, paper_number: str, paper_name: str,
+ status: PaperStatus,
+ meeting: Optional[str] = None,
+ first_released_version: Optional[str] = None,
+ github_issue: Optional[str] = None,
+ notes: Optional[str] = None,
+ original: Optional[object] = None):
+ self.paper_number = paper_number
+ self.paper_name = paper_name
+ self.status = status
+ self.meeting = meeting
+ self.first_released_version = first_released_version
+ self.github_issue = github_issue
+ self.notes = notes
+ self.original = original
+
+ def for_printing(self) -> Tuple[str, str, str, str, str, str, str]:
+ return (
+ f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
+ self.paper_name,
+ self.meeting if self.meeting is not None else '',
+ self.status.to_csv_entry(),
+ self.first_released_version if self.first_released_version is not None else '',
+ f'`#{self.github_issue} <https://github.com/llvm/llvm-project/issues/{self.github_issue}>`__' if self.github_issue is not None else '',
+ self.notes if self.notes is not None else '',
+ )
+
+ def __repr__(self) -> str:
+ return repr(self.original) if self.original is not None else repr(self.for_printing())
+
+ @staticmethod
+ def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
+ """
+ Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
+ """
+ # Extract the paper number from the first column
+ match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
+ if match is None:
+ raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
+
+ # Match the issue number if present
+ github_issue = re.search(r'#([0-9]+)', row[5])
+ if github_issue:
+ github_issue = github_issue.group(1)
+
+ try:
+ status = PaperStatus.from_csv_entry(row[3])
+ except:
+ logging.error(f'Could not extract a status from the provided CSV row: {row}')
+ raise
+
+ return PaperInfo(
+ paper_number=match.group(1),
+ paper_name=row[1],
+ status=status,
+ meeting=row[2] or None,
+ first_released_version=row[4] or None,
+ github_issue=github_issue,
+ notes=row[6] or None,
+ original=row,
+ )
+
+ @staticmethod
+ def from_github_issue(issue: Dict):# -> PaperInfo:
+ """
+ Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
+ """
+ # Extract the paper number from the issue title
+ match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
+ if match is None:
+ raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
+ paper = match.group(1)
+
+ # Extract any notes from the Github issue and populate the RST notes with them
+ issue_description = issue['content']['body']
+ notes = extract_between_markers(issue_description, 'BEGIN-RST-NOTES', 'END-RST-NOTES')
+ notes = notes.strip() if notes is not None else notes
+
+ return PaperInfo(
+ paper_number=paper,
+ paper_name=issue['title'].removeprefix(paper + ': '),
+ status=PaperStatus.from_github_issue(issue),
+ meeting=issue.get('meeting Voted', None),
+ first_released_version=None, # TODO
+ github_issue=str(issue['content']['number']),
+ notes=notes,
+ original=issue,
+ )
+
+def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo:
+ """
+ Merge a paper coming from a CSV row with a corresponding Github-tracked paper.
+
+ If the CSV row has a status that is "less advanced" than the Github issue, simply update the CSV
+ row with the newer status. Otherwise, report an error if they have a different status because
+ something must be wrong.
+
+ We don't update issues from 'To Do' to 'In Progress', since that only creates churn and the
+ status files aim to document user-facing functionality in releases, for which 'In Progress'
+ is not useful.
+
+ In case we don't update the CSV row's status, we still take any updated notes coming
+ from the Github issue and we add a link to the Github issue if it was previously missing.
+ """
+ took_gh_in_full = False # Whether we updated the entire PaperInfo from the Github version
+ if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS):
+ result = copy.deepcopy(paper)
+ elif paper.status < gh.status:
+ result = copy.deepcopy(gh)
+ took_gh_in_full = True
+ elif paper.status == gh.status:
+ result = copy.deepcopy(paper)
+ else:
+ logging.info(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}")
+ result = copy.deepcopy(paper)
+
+ # If we didn't take the Github issue in full, make sure to update the notes, the link and anything else.
+ if not took_gh_in_full:
+ result.github_issue = gh.github_issue
+ result.notes = gh.notes
+ return result
+
+def load_csv(file: IO) -> List[Tuple]:
+ rows = []
+ reader = csv.reader(file, delimiter=',')
+ for row in reader:
+ rows.append(row)
+ return rows
+
+def write_csv(output: IO, rows: List[Tuple]):
+ writer = csv.writer(output, quoting=csv.QUOTE_ALL, lineterminator='\n')
+ for row in rows:
+ writer.writerow(row)
+
+def link_github_issue(issue: str, meeting_voted: Optional[str], status: Optional[PaperStatus]) -> None:
+ """
+ Link an existing Github issue to the libc++ Conformance project.
+ """
+ # Figure out the URL of the issue
+ url = subprocess.check_output(['gh', 'issue', 'view', issue, '--json', 'url', '--jq', '.url']).decode().strip()
+
+ # Add the issue to the Conformance project
+ cli = ['gh', 'project', 'item-add', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--url', url, '--format', 'json', '--jq', '.id']
+ id = subprocess.check_output(cli).decode().strip()
+
+ # Adjust the 'Meeting Voted' field of the item
+ if meeting_voted is not None:
+ cli = ['gh', 'project', 'item-edit', '--id', id,
+ '--project-id', LIBCXX_CONFORMANCE_PROJECT_ID,
+ '--field-id', LIBCXX_CONFORMANCE_MEETING_VOTED_ID, '--text', meeting_voted]
+ subprocess.check_call(cli)
+
+ # Adjust the 'Status' field of the item
+ if status is not None:
+ option_id = status.to_project_single_select_option()
+ cli = ['gh', 'project', 'item-edit', '--id', id,
+ '--project-id', LIBCXX_CONFORMANCE_PROJECT_ID,
+ '--field-id', LIBCXX_CONFORMANCE_STATUS_ID,
+ '--single-select-option-id', option_id]
+ subprocess.check_call(cli)
+
+def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
+ """
+ Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
+ up-to-date (but potentially incomplete) tracking information from Github, this function returns the
+ new CSV rows synchronized with the up-to-date information.
+
+ Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
+ PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
+ it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
+ """
+ results = [rows[0]] # Start with the header
+ for row in rows[1:]: # Skip the header
+ # If the row contains empty entries, this is a "separator row" between meetings.
+ # Preserve it as-is.
+ if row[0] == "":
+ results.append(row)
+ continue
+
+ paper = PaperInfo.from_csv_row(row)
+
+ # Find any Github issues tracking this paper. Each row must have one and exactly one Github
+ # issue tracking it, which we validate below.
+ tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
+
+ # If there's more than one tracking issue, something is weird.
+ if len(tracking) > 1:
+ logging.warning(f"Found a row with more than one tracking issue: {row}")
+ for t in tracking:
+ logging.warning(f' tracked by: {t}')
+ results.append(row)
+ continue
+
+ # Validate the Github issue associated to the CSV row, if any
+ if paper.github_issue is not None:
+ if len(tracking) == 0:
+ logging.warning(f"Found row claiming to have a tracking issue, but failed to find a tracking issue on Github: {row}")
+ results.append(row)
+ continue
+ if len(tracking) == 1 and paper.github_issue != tracking[0].github_issue:
+ logging.warning(f"Found row with incorrect tracking issue: {row}\ntracked by: {tracking[0]}")
+ results.append(row)
+ continue
+
+ # If there is no tracking issue for that row, log that we're missing an issue.
+ if len(tracking) == 0:
+ logging.warning(f"Can't find any Github issue for CSV row: {row}")
+ results.append(row)
+ continue
+
+ results.append(merge(paper, tracking[0]).for_printing())
+
+ return results
+
+class ConformanceProject:
+ def __init__(self, _json):
+ self._json = _json
+
+ @staticmethod
+ def query(): # -> ConformanceProject
+ cli = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
+ js = json.loads(subprocess.check_output(cli))
+ return ConformanceProject(js)
+
+ @staticmethod
+ def load_from(f: IO): # -> ConformanceProject
+ js = json.load(f)
+ return ConformanceProject(js)
+
+ def save(self, output: IO) -> None:
+ json.dump(self._json, output)
+
+ def papers(self) -> List[PaperInfo]:
+ """
+ Return the list of papers tracked by the conformance project.
+ """
+ return [PaperInfo.from_github_issue(issue) for issue in self._json['items']]
+
+ def tracks(self, paper_number: str) -> bool:
+ """
+ Return whether the conformance project is tracking the given paper number.
+ """
+ for paper in self.papers():
+ if paper.paper_number == paper_number:
+ return True
+ return False
+
+
+ at click.group()
+def main():
+ """Manipulate libc++'s various source of truth for tracking conformance."""
+ pass
+
+
+ at main.group('github')
+def github_cmd():
+ """Perform actions on Github issues that track libc++ conformance."""
+ pass
+
+
+ at github_cmd.command('find-unlinked')
+ at click.option('--labels', multiple=True, default=('lwg-issue', 'wg21-paper'),
+ help='The label(s) to search for. This can be lwg-issue or wg21-paper. '
+ 'Passing this flag multiple times will OR the provided labels.')
+def github_find_unlinked(labels):
+ """Find issues not linked to the Conformance project."""
+ query = f'is:issue -project:llvm/{LIBCXX_CONFORMANCE_PROJECT} label:libc++ label:{",".join(labels)}'
+ cli = ['gh', 'issue', 'list', '--search', query, '--limit', '1000']
+ subprocess.check_call(cli)
+
+
+ at github_cmd.command('link')
+ at click.argument('issues', nargs=-1)
+ at click.option('--meeting-voted', required=False,
+ help='The optional meeting at which these issues have been voted. If provided, this is used to set the '
+ 'Meeting Voted field in the Conformance project.')
+ at click.option('--status', required=False, type=click.Choice(['todo', 'inprogress', 'partial', 'done', 'nothingtodo'],
+ case_sensitive=False),
+ help='The optional status to set the issue to in the Conformance project.')
+def github_link(issues, meeting_voted, status):
+ """Link issues to the Conformance project.
+
+ ISSUES are the issues to link to the Conformance project. This can be anything that the gh
+ command-line tool understands, such as an issue number or a URL.
+ """
+ mapping = {'todo': PaperStatus.TODO, 'inprogress': PaperStatus.IN_PROGRESS, 'partial': PaperStatus.PARTIAL,
+ 'done': PaperStatus.DONE, 'nothingtodo': PaperStatus.NOTHING_TO_DO}
+ if status is not None:
+ status = PaperStatus(mapping[status.lower()])
+ for issue in issues:
+ link_github_issue(issue, meeting_voted, status)
+
+ at github_cmd.command('create')
+ at click.argument('csvfiles', nargs=-1, type=click.File('r'))
+ at click.option('--load-github-from', type=click.File('r'), required=False,
+ help='Optionally load the Github Conformance tracking data from the given file. Useful to avoid '
+ 'running into API call limits when testing locally. See `conformance github download`.')
+ at click.option('--labels', type=str, multiple=True,
+ help='Optional labels to add to the issue being created. Either lwg-issue or wg21-paper must be provided, '
+ 'but both are mutually exclusive.')
+def github_create(csvfiles, load_github_from, labels):
+ """Create Github issues tracking libc++ conformance from CSV status files."""
+ if 'lwg-issue' not in labels and 'wg21-paper' not in labels:
+ raise click.BadParameter('Either lwg-issue or wg21-paper label should be provided', param_hint='--labels')
+
+ if load_github_from is None:
+ logging.info("Loading conformance information from Github")
+ project = ConformanceProject.query()
+ else:
+ logging.info(f"Loading conformance information from {load_github_from.name}")
+ project = ConformanceProject.load_from(load_github_from)
+
+ def all_csv_rows():
+ for file in csvfiles:
+ csv = load_csv(file)
+ for row in csv[1:]: # Skip the header
+ # If the row contains empty entries, this is a "separator row" between meetings.
+ if row[0] == "":
+ continue
+ yield row
+
+ for row in all_csv_rows():
+ paper = PaperInfo.from_csv_row(row)
+
+ # Only consider creating a Github issue for papers that are not already tracked.
+ if project.tracks(paper.paper_number):
+ continue
+
+ # If the row pretends to be tracked by a Github issue but we can't find it, something is weird.
+ if paper.github_issue is not None:
+ logging.warning(f"Found row claiming to have a tracking issue, but failed to find a tracking issue on Github: {row}")
+ continue
+
+ # Create the actual Github issue
+ paper_name = paper.paper_name.replace('``', '`').replace('\\', '')
+ cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project',
+ '--title', f'{paper.paper_number}: {paper_name}',
+ '--body', f'**Link:** https://wg21.link/{paper.paper_number}',
+ '--project', 'libc++ Standards Conformance',
+ '--label', 'libc++']
+
+ for label in labels:
+ cli += ['--label', label]
+
+ print("Do you want to create the following issue?")
+ print(cli)
+ answer = input("y/n: ")
+ if answer == 'n':
+ print("Not creating issue")
+ continue
+ elif answer != 'y':
+ print(f"Invalid answer {answer}, skipping")
+ continue
+
+ logging.info("Creating issue")
+ issue_link = subprocess.check_output(cli).decode().strip()
+ logging.info(f"Created tracking issue for {paper.paper_number}: {issue_link}")
+
+ logging.info(f"Linking {issue_link} to the libc++ Standards Conformance project")
+ link_github_issue(issue_link, paper.meeting, paper.status)
+
+
+ at github_cmd.command('download')
+ at click.option('--output', '-o', type=click.File('w'), default='-',
+ help='Save the data to the provided file. Default is stdout.')
+def github_download(output):
+ """Download libc++ conformance tracking data from Github issues and save it locally."""
+ project = ConformanceProject.query()
+ project.save(output)
+
+
+ at main.group('csv')
+def csv_cmd():
+ """Perform actions on the CSV files that track libc++ conformance."""
+ pass
+
+
+ at csv_cmd.command('validate')
+ at click.argument('files', nargs=-1, type=click.File('r'))
+def csv_validate(files):
+ """Validate the format of the provided CSV files."""
+ for file in files:
+ csv = load_csv(file)
+ for row in csv[1:]: # Skip the header
+ if row[0] != "": # Skip separator rows
+ PaperInfo.from_csv_row(row)
+
+
+ at csv_cmd.command('synchronize')
+ at click.argument('files', nargs=-1, type=click.File('r'))
+ at click.option('--output', '-o', type=click.File('w'), default='-',
+ help='Write the generated CSV to the specified file. Defaults to stdout.')
+ at click.option('--load-github-from', type=click.File('r'), required=False,
+ help='Optionally load the Github Conformance tracking data from the given file. Useful to avoid '
+ 'running into API call limits when testing locally. See `conformance github download`.')
+def csv_synchronize(files, output, load_github_from):
+ """Synchronize existing CSV status files based on existing Github issues."""
+ if load_github_from is None:
+ logging.info("Loading conformance information from Github")
+ project = ConformanceProject.query()
+ else:
+ logging.info(f"Loading conformance information from {load_github_from.name}")
+ project = ConformanceProject.load_from(load_github_from)
+
+ for file in files:
+ logging.info(f"Synchronizing {file.name} with Github issues")
+ csv = load_csv(file)
+ synced = sync_csv(csv, project.papers())
+ write_csv(output, synced)
+
+
+ at csv_cmd.command('create')
+ at click.argument('issues', nargs=-1)
+ at click.option('--output', '-o', type=click.File('w'), default='-',
+ help='Write the generated CSV to the specified file. Defaults to stdout.')
+ at click.option('--load-github-from', type=click.File('r'), required=False,
+ help='Optionally load the Github Conformance tracking data from the given file. Useful to avoid '
+ 'running into API call limits when testing locally. See `conformance github download`.')
+def csv_create(issues, output, load_github_from):
+ """Produce CSV entries corresponding to the given Github issues."""
+ if load_github_from is None:
+ logging.info("Loading conformance information from Github")
+ project = ConformanceProject.query()
+ else:
+ logging.info(f"Loading conformance information from {load_github_from.name}")
+ project = ConformanceProject.load_from(load_github_from)
+
+ rows = []
+ for issue in issues:
+ # Figure out the issue number
+ number = subprocess.check_output(['gh', 'issue', 'view', issue, '--json', 'number', '--jq', '.number']).decode().strip()
+
+ # Find the PaperInfo corresponding to that issue. If there is none, the issue provided by the user is invalid.
+ for paper in project.papers():
+ if paper.github_issue == number:
+ rows.append(paper.for_printing())
+ break
+ else:
+ raise RuntimeError(f'Issue {issue} does not have a corresponding tracking issue in Github, aborting')
+
+ write_csv(output, rows)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/libcxx/utils/requirements.txt b/libcxx/utils/requirements.txt
index 1ec769c8693dc..2f97be84e1024 100644
--- a/libcxx/utils/requirements.txt
+++ b/libcxx/utils/requirements.txt
@@ -1,3 +1,4 @@
+click
GitPython
numpy
pandas
diff --git a/libcxx/utils/synchronize_csv_status_files.py b/libcxx/utils/synchronize_csv_status_files.py
deleted file mode 100755
index fd670ed265dcc..0000000000000
--- a/libcxx/utils/synchronize_csv_status_files.py
+++ /dev/null
@@ -1,472 +0,0 @@
-#!/usr/bin/env python3
-# ===----------------------------------------------------------------------===##
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ===----------------------------------------------------------------------===##
-
-from typing import List, Dict, Tuple, Optional
-import copy
-import csv
-import itertools
-import json
-import os
-import pathlib
-import re
-import subprocess
-
-# Number of the 'Libc++ Standards Conformance' project on Github
-LIBCXX_CONFORMANCE_PROJECT = '31'
-
-def extract_between_markers(text: str, begin_marker: str, end_marker: str) -> Optional[str]:
- """
- Given a string containing special markers, extract everything located beetwen these markers.
-
- If the beginning marker is not found, None is returned. If the beginning marker is found but
- there is no end marker, it is an error (this is done to avoid silently accepting inputs that
- are erroneous by mistake).
- """
- start = text.find(begin_marker)
- if start == -1:
- return None
-
- start += len(begin_marker) # skip the marker itself
- end = text.find(end_marker, start)
- if end == -1:
- raise ArgumentError(f"Could not find end marker {end_marker} in: {text[start:]}")
-
- return text[start:end]
-
-class PaperStatus:
- TODO = 1
- IN_PROGRESS = 2
- PARTIAL = 3
- DONE = 4
- NOTHING_TO_DO = 5
-
- _status: int
-
- _original: Optional[str]
- """
- Optional string from which the paper status was created. This is used to carry additional
- information from CSV rows, like any notes associated to the status.
- """
-
- def __init__(self, status: int, original: Optional[str] = None):
- self._status = status
- self._original = original
-
- def __eq__(self, other) -> bool:
- return self._status == other._status
-
- def __lt__(self, other) -> bool:
- relative_order = {
- PaperStatus.TODO: 0,
- PaperStatus.IN_PROGRESS: 1,
- PaperStatus.PARTIAL: 2,
- PaperStatus.DONE: 3,
- PaperStatus.NOTHING_TO_DO: 3,
- }
- return relative_order[self._status] < relative_order[other._status]
-
- @staticmethod
- def from_csv_entry(entry: str):
- """
- Parse a paper status out of a CSV row entry. Entries can look like:
- - '' (an empty string, which means the paper is not done yet)
- - '|In Progress|'
- - '|Partial|'
- - '|Complete|'
- - '|Nothing To Do|'
- """
- if entry == '':
- return PaperStatus(PaperStatus.TODO, entry)
- elif entry == '|In Progress|':
- return PaperStatus(PaperStatus.IN_PROGRESS, entry)
- elif entry == '|Partial|':
- return PaperStatus(PaperStatus.PARTIAL, entry)
- elif entry == '|Complete|':
- return PaperStatus(PaperStatus.DONE, entry)
- elif entry == '|Nothing To Do|':
- return PaperStatus(PaperStatus.NOTHING_TO_DO, entry)
- else:
- raise RuntimeError(f'Unexpected CSV entry for status: {entry}')
-
- @staticmethod
- def from_github_issue(issue: Dict):
- """
- Parse a paper status out of a Github issue obtained from querying a Github project.
- """
- if 'status' not in issue:
- return PaperStatus(PaperStatus.TODO)
- elif issue['status'] == 'Todo':
- return PaperStatus(PaperStatus.TODO)
- elif issue['status'] == 'In Progress':
- return PaperStatus(PaperStatus.IN_PROGRESS)
- elif issue['status'] == 'Partial':
- return PaperStatus(PaperStatus.PARTIAL)
- elif issue['status'] == 'Done':
- return PaperStatus(PaperStatus.DONE)
- elif issue['status'] == 'Nothing To Do':
- return PaperStatus(PaperStatus.NOTHING_TO_DO)
- else:
- raise RuntimeError(f"Received unrecognizable Github issue status: {issue['status']}")
-
- def to_csv_entry(self) -> str:
- """
- Return the issue state formatted for a CSV entry. The status is formatted as '|Complete|',
- '|In Progress|', etc.
- """
- mapping = {
- PaperStatus.TODO: '',
- PaperStatus.IN_PROGRESS: '|In Progress|',
- PaperStatus.PARTIAL: '|Partial|',
- PaperStatus.DONE: '|Complete|',
- PaperStatus.NOTHING_TO_DO: '|Nothing To Do|',
- }
- return self._original if self._original is not None else mapping[self._status]
-
-class PaperInfo:
- paper_number: str
- """
- Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
- """
-
- paper_name: str
- """
- Plain text string representing the name of the paper.
- """
-
- status: PaperStatus
- """
- Status of the paper/issue. This can be complete, in progress, partial, or done.
- """
-
- meeting: Optional[str]
- """
- Plain text string representing the meeting at which the paper/issue was voted.
- """
-
- first_released_version: Optional[str]
- """
- First version of LLVM in which this paper/issue was resolved.
- """
-
- github_issue: Optional[str]
- """
- Optional number of the Github issue tracking the implementation status of this paper.
- This is used to cross-reference rows in the status pages with Github issues.
- """
-
- notes: Optional[str]
- """
- Optional plain text string representing notes to associate to the paper.
- This is used to populate the "Notes" column in the CSV status pages.
- """
-
- original: Optional[object]
- """
- Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
- was used to generate this PaperInfo and is useful for error reporting purposes.
- """
-
- def __init__(self, paper_number: str, paper_name: str,
- status: PaperStatus,
- meeting: Optional[str] = None,
- first_released_version: Optional[str] = None,
- github_issue: Optional[str] = None,
- notes: Optional[str] = None,
- original: Optional[object] = None):
- self.paper_number = paper_number
- self.paper_name = paper_name
- self.status = status
- self.meeting = meeting
- self.first_released_version = first_released_version
- self.github_issue = github_issue
- self.notes = notes
- self.original = original
-
- def for_printing(self) -> Tuple[str, str, str, str, str, str, str]:
- return (
- f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
- self.paper_name,
- self.meeting if self.meeting is not None else '',
- self.status.to_csv_entry(),
- self.first_released_version if self.first_released_version is not None else '',
- f'`#{self.github_issue} <https://github.com/llvm/llvm-project/issues/{self.github_issue}>`__' if self.github_issue is not None else '',
- self.notes if self.notes is not None else '',
- )
-
- def __repr__(self) -> str:
- return repr(self.original) if self.original is not None else repr(self.for_printing())
-
- @staticmethod
- def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
- """
- Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
- """
- # Extract the paper number from the first column
- match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
- if match is None:
- raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
-
- # Match the issue number if present
- github_issue = re.search(r'#([0-9]+)', row[5])
- if github_issue:
- github_issue = github_issue.group(1)
-
- return PaperInfo(
- paper_number=match.group(1),
- paper_name=row[1],
- status=PaperStatus.from_csv_entry(row[3]),
- meeting=row[2] or None,
- first_released_version=row[4] or None,
- github_issue=github_issue,
- notes=row[6] or None,
- original=row,
- )
-
- @staticmethod
- def from_github_issue(issue: Dict):# -> PaperInfo:
- """
- Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
- """
- # Extract the paper number from the issue title
- match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
- if match is None:
- raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
- paper = match.group(1)
-
- # Extract any notes from the Github issue and populate the RST notes with them
- issue_description = issue['content']['body']
- notes = extract_between_markers(issue_description, 'BEGIN-RST-NOTES', 'END-RST-NOTES')
- notes = notes.strip() if notes is not None else notes
-
- return PaperInfo(
- paper_number=paper,
- paper_name=issue['title'].removeprefix(paper + ': '),
- status=PaperStatus.from_github_issue(issue),
- meeting=issue.get('meeting Voted', None),
- first_released_version=None, # TODO
- github_issue=str(issue['content']['number']),
- notes=notes,
- original=issue,
- )
-
-def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo:
- """
- Merge a paper coming from a CSV row with a corresponding Github-tracked paper.
-
- If the CSV row has a status that is "less advanced" than the Github issue, simply update the CSV
- row with the newer status. Otherwise, report an error if they have a different status because
- something must be wrong.
-
- We don't update issues from 'To Do' to 'In Progress', since that only creates churn and the
- status files aim to document user-facing functionality in releases, for which 'In Progress'
- is not useful.
-
- In case we don't update the CSV row's status, we still take any updated notes coming
- from the Github issue and we add a link to the Github issue if it was previously missing.
- """
- took_gh_in_full = False # Whether we updated the entire PaperInfo from the Github version
- if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS):
- result = copy.deepcopy(paper)
- elif paper.status < gh.status:
- result = copy.deepcopy(gh)
- took_gh_in_full = True
- elif paper.status == gh.status:
- result = copy.deepcopy(paper)
- else:
- print(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}")
- result = copy.deepcopy(paper)
-
- # If we didn't take the Github issue in full, make sure to update the notes, the link and anything else.
- if not took_gh_in_full:
- result.github_issue = gh.github_issue
- result.notes = gh.notes
- return result
-
-def load_csv(file: pathlib.Path) -> List[Tuple]:
- rows = []
- with open(file, newline='', encoding='utf-8') as f:
- reader = csv.reader(f, delimiter=',')
- for row in reader:
- rows.append(row)
- return rows
-
-def write_csv(output: pathlib.Path, rows: List[Tuple]):
- with open(output, 'w', newline='', encoding='utf-8') as f:
- writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n')
- for row in rows:
- writer.writerow(row)
-
-def create_github_issue(paper: PaperInfo, labels: List[str]) -> None:
- """
- Create a new Github issue representing the given PaperInfo.
- """
- assert paper.github_issue is None, "Trying to create a Github issue for a paper that is already tracked"
-
- paper_name = paper.paper_name.replace('``', '`').replace('\\', '')
-
- create_cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project',
- '--title', f'{paper.paper_number}: {paper_name}',
- '--body', f'**Link:** https://wg21.link/{paper.paper_number}',
- '--project', 'libc++ Standards Conformance',
- '--label', 'libc++']
-
- for label in labels:
- create_cli += ['--label', label]
-
- print("Do you want to create the following issue?")
- print(create_cli)
- answer = input("y/n: ")
- if answer == 'n':
- print("Not creating issue")
- return
- elif answer != 'y':
- print(f"Invalid answer {answer}, skipping")
- return
-
- print("Creating issue")
- issue_link = subprocess.check_output(create_cli).decode().strip()
- print(f"Created tracking issue for {paper.paper_number}: {issue_link}")
-
- # Retrieve the "Github project item ID" by re-adding the issue to the project again,
- # even though we created it inside the project in the first place.
- item_add_cli = ['gh', 'project', 'item-add', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--url', issue_link, '--format', 'json']
- item = json.loads(subprocess.check_output(item_add_cli).decode().strip())
-
- # Then, adjust the 'Meeting Voted' field of that item.
- meeting_voted_cli = ['gh', 'project', 'item-edit',
- '--project-id', 'PVT_kwDOAQWwKc4AlOgt',
- '--field-id', 'PVTF_lADOAQWwKc4AlOgtzgdUEXI', '--text', paper.meeting,
- '--id', item['id']]
- subprocess.check_call(meeting_voted_cli)
-
- # And also adjust the 'Status' field of the item to 'To Do'.
- status_cli = ['gh', 'project', 'item-edit',
- '--project-id', 'PVT_kwDOAQWwKc4AlOgt',
- '--field-id', 'PVTSSF_lADOAQWwKc4AlOgtzgdUBak', '--single-select-option-id', 'f75ad846',
- '--id', item['id']]
- subprocess.check_call(status_cli)
-
-def sync_csv(rows: List[Tuple], from_github: List[PaperInfo], create_new: bool, labels: List[str] = None) -> List[Tuple]:
- """
- Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
- up-to-date (but potentially incomplete) tracking information from Github, this function returns the
- new CSV rows synchronized with the up-to-date information.
-
- If `create_new` is True and a paper from the CSV file is not tracked on Github yet, this also prompts
- to create a new issue on Github for tracking it. In that case the created issue is tagged with the
- provided labels.
-
- Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
- PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
- it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
- """
- results = [rows[0]] # Start with the header
- for row in rows[1:]: # Skip the header
- # If the row contains empty entries, this is a "separator row" between meetings.
- # Preserve it as-is.
- if row[0] == "":
- results.append(row)
- continue
-
- paper = PaperInfo.from_csv_row(row)
-
- # Find any Github issues tracking this paper. Each row must have one and exactly one Github
- # issue tracking it, which we validate below.
- tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
-
- # If there's more than one tracking issue, something is weird.
- if len(tracking) > 1:
- print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
- results.append(row)
- continue
-
- # Validate the Github issue associated to the CSV row, if any
- if paper.github_issue is not None:
- if len(tracking) == 0:
- print(f"Found row claiming to have a tracking issue, but failed to find a tracking issue on Github: {row}")
- results.append(row)
- continue
- if len(tracking) == 1 and paper.github_issue != tracking[0].github_issue:
- print(f"Found row with incorrect tracking issue: {row}\ntracked by: {tracking[0]}")
- results.append(row)
- continue
-
- # If there is no tracking issue for that row and we are creating new issues, do that.
- # Otherwise just log that we're missing an issue.
- if len(tracking) == 0:
- if create_new:
- assert labels is not None, "Missing labels when creating new Github issues"
- create_github_issue(paper, labels=labels)
- else:
- print(f"Can't find any Github issue for CSV row: {row}")
- results.append(row)
- continue
-
- results.append(merge(paper, tracking[0]).for_printing())
-
- return results
-
-CSV_FILES_TO_SYNC = {
- 'Cxx17Issues.csv': ['c++17', 'lwg-issue'],
- 'Cxx17Papers.csv': ['c++17', 'wg21-paper'],
- 'Cxx20Issues.csv': ['c++20', 'lwg-issue'],
- 'Cxx20Papers.csv': ['c++20', 'wg21-paper'],
- 'Cxx23Issues.csv': ['c++23', 'lwg-issue'],
- 'Cxx23Papers.csv': ['c++23', 'wg21-paper'],
- 'Cxx2cIssues.csv': ['c++26', 'lwg-issue'],
- 'Cxx2cPapers.csv': ['c++26', 'wg21-paper'],
-}
-
-def main(argv):
- import argparse
- parser = argparse.ArgumentParser(prog='synchronize-status-files',
- description='Synchronize the libc++ conformance status files with Github issues')
- parser.add_argument('--validate-only', action='store_true',
- help="Only perform the data validation of CSV files.")
- parser.add_argument('--create-new', action='store_true',
- help="Create new Github issues for CSV rows that do not correspond to any existing Github issue.")
- parser.add_argument('--load-github-from', type=str,
- help="A json file to load the Github project information from instead of querying the API. This is useful for testing to avoid rate limiting.")
- args = parser.parse_args(argv)
-
- libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
- # Perform data validation for all the CSV files.
- print("Performing data validation of the CSV files")
- for filename in CSV_FILES_TO_SYNC:
- csv = load_csv(libcxx_root / 'docs' / 'Status' / filename)
- for row in csv[1:]: # Skip the header
- if row[0] != "": # Skip separator rows
- PaperInfo.from_csv_row(row)
-
- if args.validate_only:
- return
-
- # Load all the Github issues tracking papers from Github.
- if args.load_github_from:
- print(f"Loading all issues from {args.load_github_from}")
- with open(args.load_github_from, 'r', encoding='utf-8') as f:
- project_info = json.load(f)
- else:
- print("Loading all issues from Github")
- gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
- project_info = json.loads(subprocess.check_output(gh_command_line))
- from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]
-
- # Synchronize CSV files with the Github issues.
- for (filename, labels) in CSV_FILES_TO_SYNC.items():
- print(f"Synchronizing {filename} with Github issues")
- file = libcxx_root / 'docs' / 'Status' / filename
- csv = load_csv(file)
- synced = sync_csv(csv, from_github, create_new=args.create_new, labels=labels)
- write_csv(file, synced)
-
-if __name__ == '__main__':
- import sys
- main(sys.argv[1:])
More information about the libcxx-commits
mailing list