[llvm] [workflows] Add a new workflow for checking commit access qualifications (PR #93301)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 23:53:50 PDT 2024
================
@@ -0,0 +1,443 @@
+import datetime
+import github
+import re
+import requests
+import time
+import sys
+import re
+
+
+class User:
+ THRESHOLD = 5
+
+ def __init__(self, name, triage_list):
+ self.name = name
+ self.authored = 0
+ self.merged = 0
+ self.reviewed = 0
+ self.triage_list = triage_list
+
+ def add_authored(self, val=1):
+ self.authored += val
+ if self.meets_threshold():
+ print(self.name, "meets the threshold with authored commits")
+ del self.triage_list[self.name]
+
+ def set_authored(self, val):
+ self.authored = 0
+ self.add_authored(val)
+
+ def add_merged(self, val=1):
+ self.merged += val
+ if self.meets_threshold():
+ print(self.name, "meets the threshold with merged commits")
+ del self.triage_list[self.name]
+
+ def add_reviewed(self, val=1):
+ self.reviewed += val
+ if self.meets_threshold():
+ print(self.name, "meets the threshold with reviewed commits")
+ del self.triage_list[self.name]
+
+ def get_total(self):
+ return self.authored + self.merged + self.reviewed
+
+ def meets_threshold(self):
+ return self.get_total() >= self.THRESHOLD
+
+ def __repr__(self):
+ return "{} : a: {} m: {} r: {}".format(
+ self.name, self.authored, self.merged, self.reviewed
+ )
+
+
+def run_graphql_query(
+ query: str, variables: dict, token: str, retry: bool = True
+) -> dict:
+ """
+ This function submits a graphql query and returns the results as a
+ dictionary.
+ """
+ s = requests.Session()
+ retries = requests.adapters.Retry(total=8, backoff_factor=2, status_forcelist=[504])
+ s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
+
+ headers = {
+ "Authorization": "bearer {}".format(token),
+ # See
+ # https://github.blog/2021-11-16-graphql-global-id-migration-update/
+ "X-Github-Next-Global-ID": "1",
+ }
+ request = s.post(
+ url="https://api.github.com/graphql",
+ json={"query": query, "variables": variables},
+ headers=headers,
+ )
+
+ rate_limit = request.headers.get("X-RateLimit-Remaining")
+ print(rate_limit)
+ if rate_limit and int(rate_limit) < 10:
+ reset_time = int(request.headers["X-RateLimit-Reset"])
+ while reset_time - int(time.time()) > 0:
+ time.sleep(60)
+ print(
+ "Waiting until rate limit reset",
+ reset_time - int(time.time()),
+ "seconds remaining",
+ )
+
+ if request.status_code == 200:
+ if "data" not in request.json():
+ print(request.json())
+ sys.exit(1)
+ return request.json()["data"]
+ elif retry:
+ return run_graphql_query(query, variables, token, False)
+ else:
+ raise Exception(
+ "Failed to run graphql query\nquery: {}\nerror: {}".format(
+ query, request.json()
+ )
+ )
+
+
+def check_manual_requests(start_date, token) -> bool:
+ query = """
+ query ($query: String!) {
+ search(query: $query, type: ISSUE, first: 100) {
+ nodes {
+ ... on Issue {
+ body
+ comments (first: 100) {
+ nodes {
+ author {
+ login
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ """
+ formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
+ variables = {
+ "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access"
+ }
+
+ data = run_graphql_query(query, variables, token)
+ users = []
+ for issue in data["search"]["nodes"]:
+ users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])])
+ # Do we need to check comments if we are checking mentions??
+ # for comment in issue['comments']['nodes']:
+ # users.append(comment['author']['login'])
+
+ return users
+
+
+def get_num_commits(user, start_date, token) -> bool:
+ variables = {
+ "owner": "llvm",
+ "user": user,
+ "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"),
+ }
+
+ user_query = """
+ query ($user: String!) {
+ user(login: $user) {
+ id
+ }
+ }
+ """
+
+ data = run_graphql_query(user_query, variables, token)
+ variables["user_id"] = data["user"]["id"]
+
+ query = """
+ query ($owner: String!, $user_id: ID!, $start_date: GitTimestamp!){
+ organization(login: $owner) {
+ teams(query: "llvm-committers" first:1) {
+ nodes {
+ repositories {
+ nodes {
+ ref(qualifiedName: "main") {
+ target {
+ ... on Commit {
+ history(since: $start_date, author: {id: $user_id }) {
+ totalCount
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ """
+ count = 0
+ data = run_graphql_query(query, variables, token)
+ for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
+ count += int(repo["ref"]["target"]["history"]["totalCount"])
+ if count >= User.THRESHOLD:
+ break
+ return count
+
+
+def is_new_committer_query_user(user, start_date, token):
+ user_query = """
+ query {
+ organization(login: "llvm") {
+ id
+ }
+ }
+ """
+
+ data = run_graphql_query(user_query, {}, token)
+ variables = {
+ # We can only check one year of date at a time, so check for contribution between 3 and 4 years ago.
+ "start_date": (start_date - datetime.timedelta(weeks=2 * 52)).strftime(
+ "%Y-%m-%dT%H:%M:%S"
+ ),
+ "org": data["organization"]["id"],
+ "user": user,
+ }
+ query = """
+ query ($user: String!, $start_date: DateTime!, $org:ID!){
+ user(login: $user) {
+ contributionsCollection(from:$start_date, organizationID:$org) {
+ totalCommitContributions
+ }
+ }
+ }
+ """
+
+ data = run_graphql_query(query, variables, token)
+ if int(data["user"]["contributionsCollection"]["totalCommitContributions"]) > 0:
+ return False
+ return True
+
+
+def is_new_committer_query_repo(user, start_date, token):
+ variables = {
+ "user": user,
+ }
+
+ user_query = """
+ query ($user: String!) {
+ user(login: $user) {
+ id
+ }
+ }
+ """
+
+ data = run_graphql_query(user_query, variables, token)
+ variables["owner"] = "llvm"
+ variables["user_id"] = data["user"]["id"]
+ variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S")
+
+ query = """
+ query ($owner: String!, $user_id: ID!){
+ organization(login: $owner) {
+ repository(name: "llvm-project") {
+ ref(qualifiedName: "main") {
+ target {
+ ... on Commit {
+ history(author: {id: $user_id }, first: 5) {
+ nodes {
+ committedDate
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ """
+
+ data = run_graphql_query(query, variables, token)
+ repo = data["organization"]["repository"]
+ commits = repo["ref"]["target"]["history"]["nodes"]
+ if len(commits) == 0:
+ return True
+ committed_date = commits[-1]["committedDate"]
+ if datetime.datetime.strptime(committed_date, "%Y-%m-%dT%H:%M:%SZ") < start_date:
+ return False
+ return True
+
+
+def is_new_committer_pr_author(user, start_date, token):
+ query = """
+ query ($query: String!) {
+ search(query: $query, type: ISSUE, first: 5) {
+ issueCount
+ }
+ }
+ """
+ formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
+ variables = {
+ "owner": "llvm",
+ "repo": "llvm-project",
+ "user": user,
+ "query": f"type:pr author:{user} created:>{formatted_start_date} org:llvm",
+ }
+
+ data = run_graphql_query(query, variables, token)
+ return int(data["search"]["issueCount"]) > 0
+
+
+def is_new_committer(user, start_date, token):
+ try:
+ return is_new_committer_query_repo(user, start_date, token)
+ except:
+ pass
+ return True
+
+
+def get_review_count(user, start_date, token):
+ query = """
+ query ($query: String!) {
+ search(query: $query, type: ISSUE, first: 5) {
+ issueCount
+ }
+ }
+ """
+ formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
+ variables = {
+ "owner": "llvm",
+ "repo": "llvm-project",
+ "user": user,
+ "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
+ }
+
+ data = run_graphql_query(query, variables, token)
+ return int(data["search"]["issueCount"])
+
+
+def count_prs(triage_list, start_date, token):
+ query = """
+ query ($query: String!, $after: String) {
+ search(query: $query, type: ISSUE, first: 100, after: $after) {
+ issueCount,
+ nodes {
+ ... on PullRequest {
+ author {
+ login
+ }
+ mergedBy {
+ login
+ }
+ }
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ }
+ }
+ """
+ date_begin = start_date
+ date_end = None
+ while date_begin < datetime.datetime.now():
+ date_end = date_begin + datetime.timedelta(days=7)
+ formatted_date_begin = date_begin.strftime("%Y-%m-%dT%H:%M:%S")
+ formatted_date_end = date_end.strftime("%Y-%m-%dT%H:%M:%S")
+ variables = {
+ "query": f"type:pr is:merged merged:{formatted_date_begin}..{formatted_date_end} org:llvm",
+ }
+ has_next_page = True
+ while has_next_page:
+ print(variables)
+ data = run_graphql_query(query, variables, token)
+ for pr in data["search"]["nodes"]:
+ # Users can be None if the user has been deleted.
+ if not pr["author"]:
+ continue
+ author = pr["author"]["login"]
+ if author in triage_list:
+ triage_list[author].add_authored()
+
+ if not pr["mergedBy"]:
+ continue
+ merger = pr["mergedBy"]["login"]
+ if author == merger:
+ continue
+ if merger not in triage_list:
+ continue
+ triage_list[merger].add_merged()
+
+ has_next_page = data["search"]["pageInfo"]["hasNextPage"]
+ if has_next_page:
+ variables["after"] = data["search"]["pageInfo"]["endCursor"]
+ date_begin = date_end
+
+
+def main():
+ token = sys.argv[1]
+ gh = github.Github(login_or_token=token)
+ org = gh.get_organization("llvm")
+ repo = org.get_repo("llvm-project")
+ team = org.get_team_by_slug("llvm-committers")
+ one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365)
+ triage_list = {}
+ for member in team.get_members():
+ triage_list[member.login] = User(member.login, triage_list)
+
+ print("Start:", len(triage_list), "triagers")
+ # Step 0 Check if users have requested commit access in the last year.
+
+ for user in check_manual_requests(one_year_ago, token):
+ if user in triage_list:
+ print(user, "requested commit access in the last year.")
+ del triage_list[user]
+ print("After Request Check:", len(triage_list), "triagers")
+
+ # Step 1 count all PRs authored or merged
+ count_prs(triage_list, one_year_ago, token)
+
+ print("After PRs:", len(triage_list), "triagers")
+
+ if len(triage_list) == 0:
+ sys.exit(0)
+
+ # Step 2 check for reviews
+ for user in list(triage_list.keys()):
+ review_count = get_review_count(user, one_year_ago, token)
+ triage_list[user].add_reviewed(review_count)
+
+ print("After Reviews:", len(triage_list), "triagers")
+
+ if len(triage_list) == 0:
+ sys.exit(0)
+
+ # Step 3 check for number of commits
+ for user in list(triage_list.keys()):
+ num_commits = get_num_commits(user, one_year_ago, token)
+ # Override the total number of commits to not double count commits and
+ # authored PRs.
+ triage_list[user].set_authored(num_commits)
+
+ print("After Commits:", len(triage_list), "triagers")
+
+ # Step 4 check for new committers
+ for user in list(triage_list.keys()):
+ print("Checking", user)
+ if is_new_committer(user, one_year_ago, token):
+ print("Removing new committer: ", user)
+ del triage_list[user]
+
+ print("Complete:", len(triage_list), "triagers")
+
+ filename = "triagers.log"
+ f = open(filename, "w")
----------------
boomanaiden154 wrote:
Put this into a `with` statement to avoid manually having to call the `f.close()`? Also, a more descriptive name would probably be useful.
https://github.com/llvm/llvm-project/pull/93301
More information about the llvm-commits
mailing list