[llvm] Add script for mapping github logins to emails (PR #118834)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 5 09:19:58 PST 2024


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {darker}-->


:warning: Python code formatter, darker found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
darker --check --diff -r 0964328c2960159f66ad232bb2257fbabab3c0ec...e9cb6ca66db40780a0be152813c977cceb6a752a utils/github/contributor.py
``````````

</details>

<details>
<summary>
View the diff from darker here.
</summary>

``````````diff
--- contributor.py	2024-12-05 17:12:28.000000 +0000
+++ contributor.py	2024-12-05 17:19:33.960986 +0000
@@ -10,173 +10,254 @@
 
 from datetime import datetime
 from functools import cache
 
 contrib_database = {}
-contrib_database_path = os.path.join(os.getcwd(),'contributors.json')
+contrib_database_path = os.path.join(os.getcwd(), "contributors.json")
 verbose = False
 start_time = datetime.now()
 
+
 def CreateArgParser():
-  parser = argparse.ArgumentParser(prog='contributor', description='LLVM GitHub Organization Scripts')
-  parser.add_argument('action', choices=['register', 'stats', 'print'], help='Action to perform')
-  parser.add_argument('--database', '-d', default=os.path.join(os.getcwd(),'contributors.json'), metavar='path', required=False, help='Path to contributor database')
-  parser.add_argument('--verbose', '-v', required=False, action='store_true', help='Enable verbose logging')
-  parser.add_argument('--filter', '-f', required=False, default='all', choices=['complete', 'noreply', 'missing', 'all'], help='Filter contributor database')
-  return parser
+    parser = argparse.ArgumentParser(
+        prog="contributor", description="LLVM GitHub Organization Scripts"
+    )
+    parser.add_argument(
+        "action", choices=["register", "stats", "print"], help="Action to perform"
+    )
+    parser.add_argument(
+        "--database",
+        "-d",
+        default=os.path.join(os.getcwd(), "contributors.json"),
+        metavar="path",
+        required=False,
+        help="Path to contributor database",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        required=False,
+        action="store_true",
+        help="Enable verbose logging",
+    )
+    parser.add_argument(
+        "--filter",
+        "-f",
+        required=False,
+        default="all",
+        choices=["complete", "noreply", "missing", "all"],
+        help="Filter contributor database",
+    )
+    return parser
+
 
 @cache
 def ParseArgs():
-  parser = CreateArgParser()
-  return parser.parse_args(sys.argv[1:])
+    parser = CreateArgParser()
+    return parser.parse_args(sys.argv[1:])
+
 
 def ElapsedTime():
-  return str(datetime.now() - start_time)
-
-def Checkpoint(msg=''):
-  if not ParseArgs().verbose:
-    return
-  if len(msg) > 0:
-    print('%s - Time elapsed %s' % (msg, ElapsedTime()))
-  else:
-    print('Time elapsed: %s' % ElapsedTime())
+    return str(datetime.now() - start_time)
+
+
+def Checkpoint(msg=""):
+    if not ParseArgs().verbose:
+        return
+    if len(msg) > 0:
+        print("%s - Time elapsed %s" % (msg, ElapsedTime()))
+    else:
+        print("Time elapsed: %s" % ElapsedTime())
+
 
 def InvokeAndDecode(cmd):
-  for Attempt in range(10):
-    try:
-      status = subprocess.check_output(cmd)
-      return json.loads(status)
-    except:
-      time.sleep(60)
-  return None
+    for Attempt in range(10):
+        try:
+            status = subprocess.check_output(cmd)
+            return json.loads(status)
+        except:
+            time.sleep(60)
+    return None
+
 
 def QueryOrgMembers():
-  members = []
-  page = 1
-  while True:
-    ghCommand = ['gh', 'api', '-H', 'Accept: application/vnd.github+json',
-               '-H', 'X-GitHub-Api-Version: 2022-11-28', '/orgs/llvm/members?per_page=100&page=%d' % page]
-    status = subprocess.check_output(ghCommand)
-    new_members = json.loads(status)
-    if len(new_members) == 0:
-      Checkpoint('Finished org query')
-      return members
-    members.extend(new_members)
-    page += 1
+    members = []
+    page = 1
+    while True:
+        ghCommand = [
+            "gh",
+            "api",
+            "-H",
+            "Accept: application/vnd.github+json",
+            "-H",
+            "X-GitHub-Api-Version: 2022-11-28",
+            "/orgs/llvm/members?per_page=100&page=%d" % page,
+        ]
+        status = subprocess.check_output(ghCommand)
+        new_members = json.loads(status)
+        if len(new_members) == 0:
+            Checkpoint("Finished org query")
+            return members
+        members.extend(new_members)
+        page += 1
+
 
 def QueryUser(user):
-  ghCommand = ['gh', 'api', '-H', 'Accept: application/vnd.github+json',
-               '-H', 'X-GitHub-Api-Version: 2022-11-28', '/users/%s' % user]
-  return InvokeAndDecode(ghCommand)
+    ghCommand = [
+        "gh",
+        "api",
+        "-H",
+        "Accept: application/vnd.github+json",
+        "-H",
+        "X-GitHub-Api-Version: 2022-11-28",
+        "/users/%s" % user,
+    ]
+    return InvokeAndDecode(ghCommand)
+
 
 def LookupLastCommit(user):
-  ghCommand = ['gh', 'api', '-H', 'Accept: application/vnd.github+json',
-               '-H' 'X-GitHub-Api-Version: 2022-11-28',
-               '/repos/llvm/llvm-project/commits?author=%s&per_page=1' % user]
-  return InvokeAndDecode(ghCommand)
+    ghCommand = [
+        "gh",
+        "api",
+        "-H",
+        "Accept: application/vnd.github+json",
+        "-H" "X-GitHub-Api-Version: 2022-11-28",
+        "/repos/llvm/llvm-project/commits?author=%s&per_page=1" % user,
+    ]
+    return InvokeAndDecode(ghCommand)
+
 
 def LoadContributorDatabase():
-  contrib_database_path = ParseArgs().database
-  if not os.path.exists(contrib_database_path):
-    Checkpoint('Starting with empty contributor database (%s).' % contrib_database_path)
+    contrib_database_path = ParseArgs().database
+    if not os.path.exists(contrib_database_path):
+        Checkpoint(
+            "Starting with empty contributor database (%s)." % contrib_database_path
+        )
+        return {}
+    with open(contrib_database_path, "r") as file:
+        data = file.read()
+        contrib_database = json.loads(data)
+        if not contrib_database:
+            Checkpoint("Initializing contributor database")
+            return {}
+        Checkpoint("Contributor database loaded %d entries." % len(contrib_database))
+        return contrib_database
     return {}
-  with open(contrib_database_path, 'r') as file:
-    data = file.read()
-    contrib_database = json.loads(data)
-    if not contrib_database:
-      Checkpoint('Initializing contributor database')
-      return {}
-    Checkpoint('Contributor database loaded %d entries.' % len(contrib_database))
+
+
+def WriteContributorDatabase(db):
+    contrib_database_path = ParseArgs().database
+    with open(contrib_database_path, "w") as file:
+        json.dump(db, file)
+        Checkpoint("Saved database")
+
+
+def GenerateUserProfile(member):
+    Checkpoint("Generating user: %s" % member["login"])
+    user = {"login": member["login"]}
+    userQuery = QueryUser(member["login"])
+    if userQuery and "email" in userQuery and userQuery["email"]:
+        user["email"] = userQuery["email"]
+    else:
+        commits = LookupLastCommit(member["login"])
+        if commits and len(commits) > 0:
+            user["email"] = commits[0]["commit"]["author"]["email"]
+    return user
+
+
+def RegisterContributors():
+    contrib_database = LoadContributorDatabase()
+    orgMembers = QueryOrgMembers()
+    print("%d organization members identified" % len(orgMembers))
+    processed = 0
+    for member in orgMembers:
+        if processed % 500 == 0:
+            Checkpoint("Processed %d" % processed)
+            WriteContributorDatabase(contrib_database)
+        processed += 1
+        # For now skip members that are already in the DB...
+        if member["login"] in contrib_database:
+            continue
+        userData = GenerateUserProfile(member)
+        if userData:
+            contrib_database[member["login"]] = userData
+    WriteContributorDatabase(contrib_database)
+
+
+def PrintStats():
+    registered = 0
+    missing = 0
+    noreply = 0
+    contrib_database = LoadContributorDatabase()
+    for login, record in contrib_database.items():
+        if "email" not in record:
+            missing += 1
+            continue
+        if "noreply.github.com" in record["email"]:
+            noreply += 1
+            continue
+        registered += 1
+    print(
+        "%d (%d%%) fully registered"
+        % (registered, (registered / len(contrib_database)) * 100)
+    )
+    print(
+        "%d (%d%%) missing email" % (missing, (missing / len(contrib_database)) * 100)
+    )
+    print(
+        "%d (%d%%) using noreply" % (noreply, (noreply / len(contrib_database)) * 100)
+    )
+    print("%d total records" % len(contrib_database))
+
+
+def LoadFilteredDatabase():
+    contrib_database = LoadContributorDatabase()
+    filter = ParseArgs().filter
+    if filter == "all":
+        return contrib_database
+    if filter == "missing":
+        return {
+            key: value
+            for key, value in contrib_database.items()
+            if "email" not in value
+        }
+    if filter == "noreply":
+        return {
+            key: value
+            for key, value in contrib_database.items()
+            if "email" in value and "noreply.github" in value["email"]
+        }
+    if filter == "complete":
+        return {
+            key: value
+            for key, value in contrib_database.items()
+            if "email" in value and "noreply.github" not in value["email"]
+        }
     return contrib_database
-  return {}
-
-def WriteContributorDatabase(db):
-  contrib_database_path = ParseArgs().database
-  with open(contrib_database_path, 'w') as file:
-    json.dump(db, file)
-    Checkpoint('Saved database')
-
-def GenerateUserProfile(member):
-  Checkpoint('Generating user: %s' % member['login'])
-  user = {'login': member['login']}
-  userQuery = QueryUser(member['login'])
-  if userQuery and 'email' in userQuery and userQuery['email']:
-    user['email'] = userQuery['email']
-  else:
-    commits = LookupLastCommit(member['login'])
-    if commits and len(commits) > 0:
-      user['email'] = commits[0]['commit']['author']['email']
-  return user
-
-def RegisterContributors():
-  contrib_database = LoadContributorDatabase()
-  orgMembers = QueryOrgMembers()
-  print('%d organization members identified' % len(orgMembers))
-  processed = 0
-  for member in orgMembers:
-    if processed % 500 == 0:
-      Checkpoint('Processed %d' % processed)
-      WriteContributorDatabase(contrib_database)
-    processed += 1
-    # For now skip members that are already in the DB...
-    if member['login'] in contrib_database:
-      continue
-    userData = GenerateUserProfile(member)
-    if userData:
-      contrib_database[member['login']] = userData
-  WriteContributorDatabase(contrib_database)
-
-def PrintStats():
-  registered = 0
-  missing = 0
-  noreply = 0
-  contrib_database = LoadContributorDatabase()
-  for login, record in contrib_database.items():
-    if 'email' not in record:
-      missing += 1
-      continue
-    if 'noreply.github.com' in record['email']:
-      noreply += 1
-      continue
-    registered += 1
-  print('%d (%d%%) fully registered' % (registered, (registered/len(contrib_database)) * 100))
-  print('%d (%d%%) missing email' % (missing, (missing/len(contrib_database)) * 100))
-  print('%d (%d%%) using noreply' % (noreply, (noreply/len(contrib_database)) * 100))
-  print('%d total records' % len(contrib_database))
-
-def LoadFilteredDatabase():
-  contrib_database = LoadContributorDatabase()
-  filter = ParseArgs().filter
-  if filter == 'all':
-    return contrib_database
-  if filter == 'missing':
-    return { key: value for key, value in contrib_database.items() if 'email' not in value }
-  if filter == 'noreply':
-    return { key: value for key, value in contrib_database.items() if 'email' in value and 'noreply.github' in value['email'] }
-  if filter == 'complete':
-    return { key: value for key, value in contrib_database.items() if 'email' in value and 'noreply.github' not in value['email'] }
-  return contrib_database
+
 
 def Print():
-  contrib_database = LoadFilteredDatabase()
-  for key, value in contrib_database.items():
-    if 'email' in value:
-      print('User: %s <%s>' % (value['login'], value['email']))
-    else:
-      print('User: %s' % value['login'])
+    contrib_database = LoadFilteredDatabase()
+    for key, value in contrib_database.items():
+        if "email" in value:
+            print("User: %s <%s>" % (value["login"], value["email"]))
+        else:
+            print("User: %s" % value["login"])
+
 
 def main():
-  args = ParseArgs()
-  if args.verbose:
-    print('Beginning processing - %s' % str(start_time))
-  if args.action == 'register':
-    RegisterContributors()
-  if args.action == 'stats':
-    PrintStats()
-  if args.action == 'print':
-    Print()
-
-  if args.verbose:
-    print('Exiting - %s' % str(datetime.now()))
-
-if __name__ == '__main__':
-  main()
+    args = ParseArgs()
+    if args.verbose:
+        print("Beginning processing - %s" % str(start_time))
+    if args.action == "register":
+        RegisterContributors()
+    if args.action == "stats":
+        PrintStats()
+    if args.action == "print":
+        Print()
+
+    if args.verbose:
+        print("Exiting - %s" % str(datetime.now()))
+
+
+if __name__ == "__main__":
+    main()

``````````

</details>


https://github.com/llvm/llvm-project/pull/118834


More information about the llvm-commits mailing list