[llvm] workflows/commit-access-review: Use Python APIs for graphql queries (PR #108903)

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 17:47:45 PDT 2024


https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/108903

>From 726931815e37c71e5c3343cc2c8e44b734ab61ba Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 16 Sep 2024 17:31:17 -0700
Subject: [PATCH 1/3] workflows/commit-access-review: Use Python APIs for
 graphql queries

---
 .github/workflows/commit-access-review.py | 95 +++++++----------------
 llvm/utils/git/requirements.txt           | 12 +--
 llvm/utils/git/requirements.txt.in        |  4 +-
 3 files changed, 35 insertions(+), 76 deletions(-)

diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py
index 8ea9b1fcc2fb08..ba63aac70a565c 100644
--- a/.github/workflows/commit-access-review.py
+++ b/.github/workflows/commit-access-review.py
@@ -62,57 +62,7 @@ def __repr__(self):
         )
 
 
-def run_graphql_query(
-    query: str, variables: dict, token: str, retry: bool = True
-) -> dict:
-    """
-    This function submits a graphql query and returns the results as a
-    dictionary.
-    """
-    s = requests.Session()
-    retries = requests.adapters.Retry(total=8, backoff_factor=2, status_forcelist=[504])
-    s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
-
-    headers = {
-        "Authorization": "bearer {}".format(token),
-        # See
-        # https://github.blog/2021-11-16-graphql-global-id-migration-update/
-        "X-Github-Next-Global-ID": "1",
-    }
-    request = s.post(
-        url="https://api.github.com/graphql",
-        json={"query": query, "variables": variables},
-        headers=headers,
-    )
-
-    rate_limit = request.headers.get("X-RateLimit-Remaining")
-    print(rate_limit)
-    if rate_limit and int(rate_limit) < 10:
-        reset_time = int(request.headers["X-RateLimit-Reset"])
-        while reset_time - int(time.time()) > 0:
-            time.sleep(60)
-            print(
-                "Waiting until rate limit reset",
-                reset_time - int(time.time()),
-                "seconds remaining",
-            )
-
-    if request.status_code == 200:
-        if "data" not in request.json():
-            print(request.json())
-            sys.exit(1)
-        return request.json()["data"]
-    elif retry:
-        return run_graphql_query(query, variables, token, False)
-    else:
-        raise Exception(
-            "Failed to run graphql query\nquery: {}\nerror: {}".format(
-                query, request.json()
-            )
-        )
-
-
-def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str]:
+def check_manual_requests(gh: github.Github, start_date: datetime.datetime) -> list[str]:
     """
     Return a list of users who have been asked since ``start_date`` if they
     want to keep their commit access.
@@ -140,7 +90,8 @@ def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str
         "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access"
     }
 
-    data = run_graphql_query(query, variables, token)
+    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    data = d["data"]
     users = []
     for issue in data["search"]["nodes"]:
         users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])])
@@ -148,7 +99,7 @@ def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str
     return users
 
 
-def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int:
+def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
     """
     Get number of commits that ``user`` has been made since ``start_date`.
     """
@@ -166,7 +117,8 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int
         }
     """
 
-    data = run_graphql_query(user_query, variables, token)
+    res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+    data = d["data"]
     variables["user_id"] = data["user"]["id"]
 
     query = """
@@ -193,7 +145,8 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int
         }
      """
     count = 0
-    data = run_graphql_query(query, variables, token)
+    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    data = d["data"]
     for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
         count += int(repo["ref"]["target"]["history"]["totalCount"])
         if count >= User.THRESHOLD:
@@ -202,7 +155,7 @@ def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int
 
 
 def is_new_committer_query_repo(
-    user: str, start_date: datetime.datetime, token: str
+  gh: github.Github, user: str, start_date: datetime.datetime
 ) -> bool:
     """
     Determine if ``user`` is a new committer.  A new committer can keep their
@@ -220,7 +173,8 @@ def is_new_committer_query_repo(
         }
     """
 
-    data = run_graphql_query(user_query, variables, token)
+    res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+    data = d["data"]
     variables["owner"] = "llvm"
     variables["user_id"] = data["user"]["id"]
     variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S")
@@ -245,7 +199,8 @@ def is_new_committer_query_repo(
         }
      """
 
-    data = run_graphql_query(query, variables, token)
+    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    data = d["data"]
     repo = data["organization"]["repository"]
     commits = repo["ref"]["target"]["history"]["nodes"]
     if len(commits) == 0:
@@ -256,18 +211,18 @@ def is_new_committer_query_repo(
     return True
 
 
-def is_new_committer(user: str, start_date: datetime.datetime, token: str) -> bool:
+def is_new_committer(gh: github.Github, user: str, start_date: datetime.datetime) -> bool:
     """
     Wrapper around is_new_commiter_query_repo to handle exceptions.
     """
     try:
-        return is_new_committer_query_repo(user, start_date, token)
+        return is_new_committer_query_repo(gh, user, start_date)
     except:
         pass
     return True
 
 
-def get_review_count(user: str, start_date: datetime.datetime, token: str) -> int:
+def get_review_count(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
     """
     Return the number of reviews that ``user`` has done since ``start_date``.
     """
@@ -286,11 +241,12 @@ def get_review_count(user: str, start_date: datetime.datetime, token: str) -> in
         "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
     }
 
-    data = run_graphql_query(query, variables, token)
+    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    data = d["data"]
     return int(data["search"]["issueCount"])
 
 
-def count_prs(triage_list: dict, start_date: datetime.datetime, token: str):
+def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime):
     """
     Fetch all the merged PRs for the project since ``start_date`` and update
     ``triage_list`` with the number of PRs merged for each user.
@@ -329,7 +285,8 @@ def count_prs(triage_list: dict, start_date: datetime.datetime, token: str):
         has_next_page = True
         while has_next_page:
             print(variables)
-            data = run_graphql_query(query, variables, token)
+            res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+            data = d["data"]
             for pr in data["search"]["nodes"]:
                 # Users can be None if the user has been deleted.
                 if not pr["author"]:
@@ -365,14 +322,14 @@ def main():
 
     print("Start:", len(triage_list), "triagers")
     # Step 0 Check if users have requested commit access in the last year.
-    for user in check_manual_requests(one_year_ago, token):
+    for user in check_manual_requests(gh, one_year_ago):
         if user in triage_list:
             print(user, "requested commit access in the last year.")
             del triage_list[user]
     print("After Request Check:", len(triage_list), "triagers")
 
     # Step 1 count all PRs authored or merged
-    count_prs(triage_list, one_year_ago, token)
+    count_prs(gh, triage_list, one_year_ago)
 
     print("After PRs:", len(triage_list), "triagers")
 
@@ -381,7 +338,7 @@ def main():
 
     # Step 2 check for reviews
     for user in list(triage_list.keys()):
-        review_count = get_review_count(user, one_year_ago, token)
+        review_count = get_review_count(gh, user, one_year_ago)
         triage_list[user].add_reviewed(review_count)
 
     print("After Reviews:", len(triage_list), "triagers")
@@ -391,7 +348,7 @@ def main():
 
     # Step 3 check for number of commits
     for user in list(triage_list.keys()):
-        num_commits = get_num_commits(user, one_year_ago, token)
+        num_commits = get_num_commits(gh, user, one_year_ago)
         # Override the total number of commits to not double count commits and
         # authored PRs.
         triage_list[user].set_authored(num_commits)
@@ -401,7 +358,7 @@ def main():
     # Step 4 check for new committers
     for user in list(triage_list.keys()):
         print("Checking", user)
-        if is_new_committer(user, one_year_ago, token):
+        if is_new_committer(gh, user, one_year_ago):
             print("Removing new committer: ", user)
             del triage_list[user]
 
diff --git a/llvm/utils/git/requirements.txt b/llvm/utils/git/requirements.txt
index 9ed52610c53435..bbb9059b6b2600 100644
--- a/llvm/utils/git/requirements.txt
+++ b/llvm/utils/git/requirements.txt
@@ -222,9 +222,9 @@ pycparser==2.22 \
     --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \
     --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc
     # via cffi
-pygithub==2.2.0 \
-    --hash=sha256:41042ea53e4c372219db708c38d2ca1fd4fadab75475bac27d89d339596cfad1 \
-    --hash=sha256:e39be7c4dc39418bdd6e3ecab5931c636170b8b21b4d26f9ecf7e6102a3b51c3
+pygithub==2.4.0 \
+    --hash=sha256:6601e22627e87bac192f1e2e39c6e6f69a43152cfb8f307cee575879320b3051 \
+    --hash=sha256:81935aa4bdc939fba98fee1cb47422c09157c56a27966476ff92775602b9ee24
     # via -r requirements.txt.in
 pyjwt[crypto]==2.9.0 \
     --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \
@@ -254,9 +254,9 @@ typing-extensions==4.12.2 \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
     # via pygithub
-urllib3==2.2.2 \
-    --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \
-    --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168
+urllib3==2.2.3 \
+    --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \
+    --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9
     # via
     #   pygithub
     #   requests
diff --git a/llvm/utils/git/requirements.txt.in b/llvm/utils/git/requirements.txt.in
index 512b80b60e1d2d..e880e94e1de80e 100644
--- a/llvm/utils/git/requirements.txt.in
+++ b/llvm/utils/git/requirements.txt.in
@@ -4,6 +4,8 @@
 # pip-compile -o requirements.txt  requirements.txt.in
 
 certifi>=2023.7.22  # https://security.snyk.io/vuln/SNYK-PYTHON-CERTIFI-5805047
-PyGithub==2.2.0 # >=1.59.1 For WorkflowRun.name
+PyGithub==2.4.0 # >=1.59.1 For WorkflowRun.name
                 # >= 2.2.0 for permission arg to Repository.get_collaborators
+                # >= Fix for https://github.com/PyGithub/PyGithub/issues/3001
+                # (variables in graphql query).
 GitPython>=3.1.32  # https://security.snyk.io/vuln/SNYK-PYTHON-GITPYTHON-5840584

>From b5d36be6cbbfedf78726a7f6baf2554d631d982d Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 16 Sep 2024 17:42:12 -0700
Subject: [PATCH 2/3] Fix formatting

---
 .github/workflows/commit-access-review.py | 30 +++++++++++++++++------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py
index ba63aac70a565c..e444e0a1a177a8 100644
--- a/.github/workflows/commit-access-review.py
+++ b/.github/workflows/commit-access-review.py
@@ -62,7 +62,9 @@ def __repr__(self):
         )
 
 
-def check_manual_requests(gh: github.Github, start_date: datetime.datetime) -> list[str]:
+def check_manual_requests(
+    gh: github.Github, start_date: datetime.datetime
+) -> list[str]:
     """
     Return a list of users who have been asked since ``start_date`` if they
     want to keep their commit access.
@@ -90,7 +92,9 @@ def check_manual_requests(gh: github.Github, start_date: datetime.datetime) -> l
         "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access"
     }
 
-    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    res_header, d = gh._Github__requester.graphql_query(
+        query=query, variables=variables
+    )
     data = d["data"]
     users = []
     for issue in data["search"]["nodes"]:
@@ -117,7 +121,9 @@ def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime)
         }
     """
 
-    res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+    res_header, d = gh._Github__requester.graphql_query(
+        query=user_query, variables=variables
+    )
     data = d["data"]
     variables["user_id"] = data["user"]["id"]
 
@@ -155,7 +161,7 @@ def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime)
 
 
 def is_new_committer_query_repo(
-  gh: github.Github, user: str, start_date: datetime.datetime
+    gh: github.Github, user: str, start_date: datetime.datetime
 ) -> bool:
     """
     Determine if ``user`` is a new committer.  A new committer can keep their
@@ -211,7 +217,9 @@ def is_new_committer_query_repo(
     return True
 
 
-def is_new_committer(gh: github.Github, user: str, start_date: datetime.datetime) -> bool:
+def is_new_committer(
+    gh: github.Github, user: str, start_date: datetime.datetime
+) -> bool:
     """
     Wrapper around is_new_commiter_query_repo to handle exceptions.
     """
@@ -222,7 +230,9 @@ def is_new_committer(gh: github.Github, user: str, start_date: datetime.datetime
     return True
 
 
-def get_review_count(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
+def get_review_count(
+    gh: github.Github, user: str, start_date: datetime.datetime
+) -> int:
     """
     Return the number of reviews that ``user`` has done since ``start_date``.
     """
@@ -241,7 +251,9 @@ def get_review_count(gh: github.Github, user: str, start_date: datetime.datetime
         "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
     }
 
-    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    res_header, d = gh._Github__requester.graphql_query(
+        query=query, variables=variables
+    )
     data = d["data"]
     return int(data["search"]["issueCount"])
 
@@ -285,7 +297,9 @@ def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetim
         has_next_page = True
         while has_next_page:
             print(variables)
-            res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+            res_header, d = gh._Github__requester.graphql_query(
+                query=query, variables=variables
+            )
             data = d["data"]
             for pr in data["search"]["nodes"]:
                 # Users can be None if the user has been deleted.

>From d2941dadc4f5413972b92d0c6f9344cf75cca2df Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 16 Sep 2024 17:47:22 -0700
Subject: [PATCH 3/3] Fix formatting

---
 .github/workflows/commit-access-review.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py
index e444e0a1a177a8..84b3867058ac3f 100644
--- a/.github/workflows/commit-access-review.py
+++ b/.github/workflows/commit-access-review.py
@@ -151,7 +151,9 @@ def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime)
         }
      """
     count = 0
-    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    res_header, d = gh._Github__requester.graphql_query(
+        query=query, variables=variables
+    )
     data = d["data"]
     for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
         count += int(repo["ref"]["target"]["history"]["totalCount"])
@@ -179,7 +181,9 @@ def is_new_committer_query_repo(
         }
     """
 
-    res_header, d = gh._Github__requester.graphql_query(query=user_query, variables=variables)
+    res_header, d = gh._Github__requester.graphql_query(
+        query=user_query, variables=variables
+    )
     data = d["data"]
     variables["owner"] = "llvm"
     variables["user_id"] = data["user"]["id"]
@@ -205,7 +209,9 @@ def is_new_committer_query_repo(
         }
      """
 
-    res_header, d = gh._Github__requester.graphql_query(query=query, variables=variables)
+    res_header, d = gh._Github__requester.graphql_query(
+        query=query, variables=variables
+    )
     data = d["data"]
     repo = data["organization"]["repository"]
     commits = repo["ref"]["target"]["history"]["nodes"]



More information about the llvm-commits mailing list