[llvm] Add script for mapping github logins to emails (PR #118834)
Chris B via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 09:16:51 PST 2024
https://github.com/llvm-beanz created https://github.com/llvm/llvm-project/pull/118834
This script queries GitHub to collect a list of all members of the LLVM organization then builds out mapping of GitHub users to emails by first checking the user profile, then reviewing recent commits.
The current script identifies 3331 members of the LLVM organization. It can identify likely email addresses for 2371 (71%), 187 (5%) of users have most recent comits using GitHub no-reply email addresses, and 773 users did not have a public email address and had no commits (23%) mapped to their GitHub account.
This script is proposed to serve as the basis for identifying contact information for eligable voters for LLVM elections.
>From e9cb6ca66db40780a0be152813c977cceb6a752a Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman at me.com>
Date: Thu, 5 Dec 2024 11:12:28 -0600
Subject: [PATCH] Add script for mapping github logins to emails
This script queries GitHub to collect a list of all members of the LLVM
organization then builds out mapping of GitHub users to emails by first
checking the user profile, then reviewing recent commits.
The current script identifies 3331 members of the LLVM organization. It
can identify likely email addresses for 2371 (71%), 187 (5%) of users
have most recent comits using GitHub no-reply email addresses, and 773
users did not have a public email address and had no commits (23%)
mapped to their GitHub account.
This script is proposed to serve as the basis for identifying contact
information for eligable voters for LLVM elections.
---
utils/github/contributor.py | 182 ++++++++++++++++++++++++++++++++++++
1 file changed, 182 insertions(+)
create mode 100755 utils/github/contributor.py
diff --git a/utils/github/contributor.py b/utils/github/contributor.py
new file mode 100755
index 00000000000000..8ec764af7d19df
--- /dev/null
+++ b/utils/github/contributor.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+import json
+import sys
+import re
+import os
+import time
+
+from datetime import datetime
+from functools import cache
+
+contrib_database = {}
+contrib_database_path = os.path.join(os.getcwd(),'contributors.json')
+verbose = False
+start_time = datetime.now()
+
+def CreateArgParser():
+ parser = argparse.ArgumentParser(prog='contributor', description='LLVM GitHub Organization Scripts')
+ parser.add_argument('action', choices=['register', 'stats', 'print'], help='Action to perform')
+ parser.add_argument('--database', '-d', default=os.path.join(os.getcwd(),'contributors.json'), metavar='path', required=False, help='Path to contributor database')
+ parser.add_argument('--verbose', '-v', required=False, action='store_true', help='Enable verbose logging')
+ parser.add_argument('--filter', '-f', required=False, default='all', choices=['complete', 'noreply', 'missing', 'all'], help='Filter contributor database')
+ return parser
+
+ at cache
+def ParseArgs():
+ parser = CreateArgParser()
+ return parser.parse_args(sys.argv[1:])
+
+def ElapsedTime():
+ return str(datetime.now() - start_time)
+
+def Checkpoint(msg=''):
+ if not ParseArgs().verbose:
+ return
+ if len(msg) > 0:
+ print('%s - Time elapsed %s' % (msg, ElapsedTime()))
+ else:
+ print('Time elapsed: %s' % ElapsedTime())
+
+def InvokeAndDecode(cmd):
+ for Attempt in range(10):
+ try:
+ status = subprocess.check_output(cmd)
+ return json.loads(status)
+ except:
+ time.sleep(60)
+ return None
+
+def QueryOrgMembers():
+ members = []
+ page = 1
+ while True:
+ ghCommand = ['gh', 'api', '-H', 'Accept: application/vnd.github+json',
+ '-H', 'X-GitHub-Api-Version: 2022-11-28', '/orgs/llvm/members?per_page=100&page=%d' % page]
+ status = subprocess.check_output(ghCommand)
+ new_members = json.loads(status)
+ if len(new_members) == 0:
+ Checkpoint('Finished org query')
+ return members
+ members.extend(new_members)
+ page += 1
+
+def QueryUser(user):
+ ghCommand = ['gh', 'api', '-H', 'Accept: application/vnd.github+json',
+ '-H', 'X-GitHub-Api-Version: 2022-11-28', '/users/%s' % user]
+ return InvokeAndDecode(ghCommand)
+
+def LookupLastCommit(user):
+ ghCommand = ['gh', 'api', '-H', 'Accept: application/vnd.github+json',
+ '-H' 'X-GitHub-Api-Version: 2022-11-28',
+ '/repos/llvm/llvm-project/commits?author=%s&per_page=1' % user]
+ return InvokeAndDecode(ghCommand)
+
+def LoadContributorDatabase():
+ contrib_database_path = ParseArgs().database
+ if not os.path.exists(contrib_database_path):
+ Checkpoint('Starting with empty contributor database (%s).' % contrib_database_path)
+ return {}
+ with open(contrib_database_path, 'r') as file:
+ data = file.read()
+ contrib_database = json.loads(data)
+ if not contrib_database:
+ Checkpoint('Initializing contributor database')
+ return {}
+ Checkpoint('Contributor database loaded %d entries.' % len(contrib_database))
+ return contrib_database
+ return {}
+
+def WriteContributorDatabase(db):
+ contrib_database_path = ParseArgs().database
+ with open(contrib_database_path, 'w') as file:
+ json.dump(db, file)
+ Checkpoint('Saved database')
+
+def GenerateUserProfile(member):
+ Checkpoint('Generating user: %s' % member['login'])
+ user = {'login': member['login']}
+ userQuery = QueryUser(member['login'])
+ if userQuery and 'email' in userQuery and userQuery['email']:
+ user['email'] = userQuery['email']
+ else:
+ commits = LookupLastCommit(member['login'])
+ if commits and len(commits) > 0:
+ user['email'] = commits[0]['commit']['author']['email']
+ return user
+
+def RegisterContributors():
+ contrib_database = LoadContributorDatabase()
+ orgMembers = QueryOrgMembers()
+ print('%d organization members identified' % len(orgMembers))
+ processed = 0
+ for member in orgMembers:
+ if processed % 500 == 0:
+ Checkpoint('Processed %d' % processed)
+ WriteContributorDatabase(contrib_database)
+ processed += 1
+ # For now skip members that are already in the DB...
+ if member['login'] in contrib_database:
+ continue
+ userData = GenerateUserProfile(member)
+ if userData:
+ contrib_database[member['login']] = userData
+ WriteContributorDatabase(contrib_database)
+
+def PrintStats():
+ registered = 0
+ missing = 0
+ noreply = 0
+ contrib_database = LoadContributorDatabase()
+ for login, record in contrib_database.items():
+ if 'email' not in record:
+ missing += 1
+ continue
+ if 'noreply.github.com' in record['email']:
+ noreply += 1
+ continue
+ registered += 1
+ print('%d (%d%%) fully registered' % (registered, (registered/len(contrib_database)) * 100))
+ print('%d (%d%%) missing email' % (missing, (missing/len(contrib_database)) * 100))
+ print('%d (%d%%) using noreply' % (noreply, (noreply/len(contrib_database)) * 100))
+ print('%d total records' % len(contrib_database))
+
+def LoadFilteredDatabase():
+ contrib_database = LoadContributorDatabase()
+ filter = ParseArgs().filter
+ if filter == 'all':
+ return contrib_database
+ if filter == 'missing':
+ return { key: value for key, value in contrib_database.items() if 'email' not in value }
+ if filter == 'noreply':
+ return { key: value for key, value in contrib_database.items() if 'email' in value and 'noreply.github' in value['email'] }
+ if filter == 'complete':
+ return { key: value for key, value in contrib_database.items() if 'email' in value and 'noreply.github' not in value['email'] }
+ return contrib_database
+
+def Print():
+ contrib_database = LoadFilteredDatabase()
+ for key, value in contrib_database.items():
+ if 'email' in value:
+ print('User: %s <%s>' % (value['login'], value['email']))
+ else:
+ print('User: %s' % value['login'])
+
+def main():
+ args = ParseArgs()
+ if args.verbose:
+ print('Beginning processing - %s' % str(start_time))
+ if args.action == 'register':
+ RegisterContributors()
+ if args.action == 'stats':
+ PrintStats()
+ if args.action == 'print':
+ Print()
+
+ if args.verbose:
+ print('Exiting - %s' % str(datetime.now()))
+
+if __name__ == '__main__':
+ main()
More information about the llvm-commits
mailing list