[llvm] workflows/commit-access-review: Use Python APIs for graphql queries (PR #108903)
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 17:47:45 PDT 2024
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/108903
>From 726931815e37c71e5c3343cc2c8e44b734ab61ba Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 16 Sep 2024 17:31:17 -0700
Subject: [PATCH 1/3] workflows/commit-access-review: Use Python APIs for
graphql queries
---
.github/workflows/commit-access-review.py | 95 +++++++----------------
llvm/utils/git/requirements.txt | 12 +--
llvm/utils/git/requirements.txt.in | 4 +-
3 files changed, 35 insertions(+), 76 deletions(-)
diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py
index 8ea9b1fcc2fb08..ba63aac70a565c 100644
--- a/.github/workflows/commit-access-review.py
+++ b/.github/workflows/commit-access-review.py
@@ -62,57 +62,7 @@ def __repr__(self):
)
-def run_graphql_query(
- query: str, variables: dict, token: str, retry: bool = True
-) -> dict:
- """
- This function submits a graphql query and returns the results as a
- dictionary.
- """
- s = requests.Session()
- retries = requests.adapters.Retry(total=8, backoff_factor=2, status_forcelist=[504])
- s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
-
- headers = {
- "Authorization": "bearer {}".format(token),
- # See
- # https://github.blog/2021-11-16-graphql-global-id-migration-update/
- "X-Github-Next-Global-ID": "1",
- }
- request = s.post(
- url="https://api.github.com/graphql",
- json={"query": query, "variables": variables},
- headers=headers,
- )
-
- rate_limit = request.headers.get("X-RateLimit-Remaining")
- print(rate_limit)
- if rate_limit and int(rate_limit) < 10:
- reset_time = int(request.headers["X-RateLimit-Reset"])
- while reset_time - int(time.time()) > 0:
- time.sleep(60)
- print(
- "Waiting until rate limit reset",
- reset_time - int(time.time()),
- "seconds remaining",
- )
-
- if request.status_code == 200:
- if "data" not in request.json():
- print(request.json())
- sys.exit(1)
- return request.json()["data"]
- elif retry:
- return run_graphql_query(query, variables, token, False)
- else:
- raise Exception(
- "Failed to run graphql query\nquery: {}\nerror: {}".format(
- query, request.json()
- )
- )
-
-
-def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str]:
+def check_manual_requests(gh: github.Github, start_date: datetime.datetime) -> list[str]:
"""
Return a list of users who have been asked since ``start_date`` if they
want to keep their commit access.
@@ -140,7 +90,8 @@ def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str
"query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access"
}
- data = run_graphql_query(query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ data = d["data"]
users = []
for issue in data["search"]["nodes"]:
users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])])
@@ -148,7 +99,7 @@ def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str
return users
-def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int:
+def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
"""
Get number of commits that ``user`` has been made since ``start_date`.
"""
@@ -166,7 +117,8 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int
}
"""
- data = run_graphql_query(user_query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+ data = d["data"]
variables["user_id"] = data["user"]["id"]
query = """
@@ -193,7 +145,8 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int
}
"""
count = 0
- data = run_graphql_query(query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ data = d["data"]
for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
count += int(repo["ref"]["target"]["history"]["totalCount"])
if count >= User.THRESHOLD:
@@ -202,7 +155,7 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int
def is_new_committer_query_repo(
- user: str, start_date: datetime.datetime, token: str
+ gh: github.Github, user: str, start_date: datetime.datetime
) -> bool:
"""
Determine if ``user`` is a new committer. A new committer can keep their
@@ -220,7 +173,8 @@ def is_new_committer_query_repo(
}
"""
- data = run_graphql_query(user_query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+ data = d["data"]
variables["owner"] = "llvm"
variables["user_id"] = data["user"]["id"]
variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S")
@@ -245,7 +199,8 @@ def is_new_committer_query_repo(
}
"""
- data = run_graphql_query(query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ data = d["data"]
repo = data["organization"]["repository"]
commits = repo["ref"]["target"]["history"]["nodes"]
if len(commits) == 0:
@@ -256,18 +211,18 @@ def is_new_committer_query_repo(
return True
-def is_new_committer(user: str, start_date: datetime.datetime, token: str) -> bool:
+def is_new_committer(gh: github.Github, user: str, start_date: datetime.datetime) -> bool:
"""
Wrapper around is_new_commiter_query_repo to handle exceptions.
"""
try:
- return is_new_committer_query_repo(user, start_date, token)
+ return is_new_committer_query_repo(gh, user, start_date)
except:
pass
return True
-def get_review_count(user: str, start_date: datetime.datetime, token: str) -> int:
+def get_review_count(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
"""
Return the number of reviews that ``user`` has done since ``start_date``.
"""
@@ -286,11 +241,12 @@ def get_review_count(user: str, start_date: datetime.datetime, token: str) -> in
"query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
}
- data = run_graphql_query(query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ data = d["data"]
return int(data["search"]["issueCount"])
-def count_prs(triage_list: dict, start_date: datetime.datetime, token: str):
+def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime):
"""
Fetch all the merged PRs for the project since ``start_date`` and update
``triage_list`` with the number of PRs merged for each user.
@@ -329,7 +285,8 @@ def count_prs(triage_list: dict, start_date: datetime.datetime, token: str):
has_next_page = True
while has_next_page:
print(variables)
- data = run_graphql_query(query, variables, token)
+ res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ data = d["data"]
for pr in data["search"]["nodes"]:
# Users can be None if the user has been deleted.
if not pr["author"]:
@@ -365,14 +322,14 @@ def main():
print("Start:", len(triage_list), "triagers")
# Step 0 Check if users have requested commit access in the last year.
- for user in check_manual_requests(one_year_ago, token):
+ for user in check_manual_requests(gh, one_year_ago):
if user in triage_list:
print(user, "requested commit access in the last year.")
del triage_list[user]
print("After Request Check:", len(triage_list), "triagers")
# Step 1 count all PRs authored or merged
- count_prs(triage_list, one_year_ago, token)
+ count_prs(gh, triage_list, one_year_ago)
print("After PRs:", len(triage_list), "triagers")
@@ -381,7 +338,7 @@ def main():
# Step 2 check for reviews
for user in list(triage_list.keys()):
- review_count = get_review_count(user, one_year_ago, token)
+ review_count = get_review_count(gh, user, one_year_ago)
triage_list[user].add_reviewed(review_count)
print("After Reviews:", len(triage_list), "triagers")
@@ -391,7 +348,7 @@ def main():
# Step 3 check for number of commits
for user in list(triage_list.keys()):
- num_commits = get_num_commits(user, one_year_ago, token)
+ num_commits = get_num_commits(gh, user, one_year_ago)
# Override the total number of commits to not double count commits and
# authored PRs.
triage_list[user].set_authored(num_commits)
@@ -401,7 +358,7 @@ def main():
# Step 4 check for new committers
for user in list(triage_list.keys()):
print("Checking", user)
- if is_new_committer(user, one_year_ago, token):
+ if is_new_committer(gh, user, one_year_ago):
print("Removing new committer: ", user)
del triage_list[user]
diff --git a/llvm/utils/git/requirements.txt b/llvm/utils/git/requirements.txt
index 9ed52610c53435..bbb9059b6b2600 100644
--- a/llvm/utils/git/requirements.txt
+++ b/llvm/utils/git/requirements.txt
@@ -222,9 +222,9 @@ pycparser==2.22 \
--hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \
--hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc
# via cffi
-pygithub==2.2.0 \
- --hash=sha256:41042ea53e4c372219db708c38d2ca1fd4fadab75475bac27d89d339596cfad1 \
- --hash=sha256:e39be7c4dc39418bdd6e3ecab5931c636170b8b21b4d26f9ecf7e6102a3b51c3
+pygithub==2.4.0 \
+ --hash=sha256:6601e22627e87bac192f1e2e39c6e6f69a43152cfb8f307cee575879320b3051 \
+ --hash=sha256:81935aa4bdc939fba98fee1cb47422c09157c56a27966476ff92775602b9ee24
# via -r requirements.txt.in
pyjwt[crypto]==2.9.0 \
--hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \
@@ -254,9 +254,9 @@ typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
# via pygithub
-urllib3==2.2.2 \
- --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \
- --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168
+urllib3==2.2.3 \
+ --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \
+ --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9
# via
# pygithub
# requests
diff --git a/llvm/utils/git/requirements.txt.in b/llvm/utils/git/requirements.txt.in
index 512b80b60e1d2d..e880e94e1de80e 100644
--- a/llvm/utils/git/requirements.txt.in
+++ b/llvm/utils/git/requirements.txt.in
@@ -4,6 +4,8 @@
# pip-compile -o requirements.txt requirements.txt.in
certifi>=2023.7.22 # https://security.snyk.io/vuln/SNYK-PYTHON-CERTIFI-5805047
-PyGithub==2.2.0 # >=1.59.1 For WorkflowRun.name
+PyGithub==2.4.0 # >=1.59.1 For WorkflowRun.name
# >= 2.2.0 for permission arg to Repository.get_collaborators
+ # >= Fix for https://github.com/PyGithub/PyGithub/issues/3001
+ # (variables in graphql query).
GitPython>=3.1.32 # https://security.snyk.io/vuln/SNYK-PYTHON-GITPYTHON-5840584
>From b5d36be6cbbfedf78726a7f6baf2554d631d982d Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 16 Sep 2024 17:42:12 -0700
Subject: [PATCH 2/3] Fix formatting
---
.github/workflows/commit-access-review.py | 30 +++++++++++++++++------
1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py
index ba63aac70a565c..e444e0a1a177a8 100644
--- a/.github/workflows/commit-access-review.py
+++ b/.github/workflows/commit-access-review.py
@@ -62,7 +62,9 @@ def __repr__(self):
)
-def check_manual_requests(gh: github.Github, start_date: datetime.datetime) -> list[str]:
+def check_manual_requests(
+ gh: github.Github, start_date: datetime.datetime
+) -> list[str]:
"""
Return a list of users who have been asked since ``start_date`` if they
want to keep their commit access.
@@ -90,7 +92,9 @@ def check_manual_requests(gh: github.Github, start_date: datetime.datetime) -> l
"query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access"
}
- res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=query, variables=variables
+ )
data = d["data"]
users = []
for issue in data["search"]["nodes"]:
@@ -117,7 +121,9 @@ def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime)
}
"""
- res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=user_query, variables=variables
+ )
data = d["data"]
variables["user_id"] = data["user"]["id"]
@@ -155,7 +161,7 @@ def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime)
def is_new_committer_query_repo(
- gh: github.Github, user: str, start_date: datetime.datetime
+ gh: github.Github, user: str, start_date: datetime.datetime
) -> bool:
"""
Determine if ``user`` is a new committer. A new committer can keep their
@@ -211,7 +217,9 @@ def is_new_committer_query_repo(
return True
-def is_new_committer(gh: github.Github, user: str, start_date: datetime.datetime) -> bool:
+def is_new_committer(
+ gh: github.Github, user: str, start_date: datetime.datetime
+) -> bool:
"""
Wrapper around is_new_commiter_query_repo to handle exceptions.
"""
@@ -222,7 +230,9 @@ def is_new_committer(gh: github.Github, user: str, start_date: datetime.datetime
return True
-def get_review_count(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
+def get_review_count(
+ gh: github.Github, user: str, start_date: datetime.datetime
+) -> int:
"""
Return the number of reviews that ``user`` has done since ``start_date``.
"""
@@ -241,7 +251,9 @@ def get_review_count(gh: github.Github, user: str, start_date: datetime.datetime
"query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
}
- res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=query, variables=variables
+ )
data = d["data"]
return int(data["search"]["issueCount"])
@@ -285,7 +297,9 @@ def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetim
has_next_page = True
while has_next_page:
print(variables)
- res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=query, variables=variables
+ )
data = d["data"]
for pr in data["search"]["nodes"]:
# Users can be None if the user has been deleted.
>From d2941dadc4f5413972b92d0c6f9344cf75cca2df Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 16 Sep 2024 17:47:22 -0700
Subject: [PATCH 3/3] Fix formatting
---
.github/workflows/commit-access-review.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py
index e444e0a1a177a8..84b3867058ac3f 100644
--- a/.github/workflows/commit-access-review.py
+++ b/.github/workflows/commit-access-review.py
@@ -151,7 +151,9 @@ def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime)
}
"""
count = 0
- res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=query, variables=variables
+ )
data = d["data"]
for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
count += int(repo["ref"]["target"]["history"]["totalCount"])
@@ -179,7 +181,9 @@ def is_new_committer_query_repo(
}
"""
- res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=user_query, variables=variables
+ )
data = d["data"]
variables["owner"] = "llvm"
variables["user_id"] = data["user"]["id"]
@@ -205,7 +209,9 @@ def is_new_committer_query_repo(
}
"""
- res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+ res_header, d = gh._Github__requester.graphql_query(
+ query=query, variables=variables
+ )
data = d["data"]
repo = data["organization"]["repository"]
commits = repo["ref"]["target"]["history"]["nodes"]
More information about the llvm-commits
mailing list