github_stats.py
200 lines
| 7.0 KiB
| text/x-python
|
PythonLexer
/ tools / github_stats.py
Fernando Perez
|
r4427 | #!/usr/bin/env python | ||
"""Simple tools to query github.com and gather stats about issues. | ||||
""" | ||||
Fernando Perez
|
r4433 | #----------------------------------------------------------------------------- | ||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4427 | from __future__ import print_function | ||
Thomas Kluyver
|
r4385 | import json | ||
MinRK
|
r6416 | import re | ||
Thomas Kluyver
|
r4385 | import sys | ||
Fernando Perez
|
r4427 | |||
from datetime import datetime, timedelta | ||||
MinRK
|
r7758 | from subprocess import check_output | ||
MinRK
|
r11584 | from gh_api import get_paged_request, make_auth_header, get_pull_request | ||
Thomas Kluyver
|
r4385 | |||
Fernando Perez
|
r4433 | #----------------------------------------------------------------------------- | ||
MinRK
|
r6416 | # Globals | ||
#----------------------------------------------------------------------------- | ||||
ISO8601 = "%Y-%m-%dT%H:%M:%SZ" | ||||
PER_PAGE = 100 | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4433 | # Functions | ||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4427 | |||
MinRK
|
r6416 | def get_issues(project="ipython/ipython", state="closed", pulls=False): | ||
Thomas Kluyver
|
r4385 | """Get a list of the issues from the Github API.""" | ||
MinRK
|
r6416 | which = 'pulls' if pulls else 'issues' | ||
url = "https://api.github.com/repos/%s/%s?state=%s&per_page=%i" % (project, which, state, PER_PAGE) | ||||
MinRK
|
r11584 | return get_paged_request(url, headers=make_auth_header()) | ||
Thomas Kluyver
|
r4385 | |||
MinRK
|
r11586 | def round_hour(dt): | ||
return dt.replace(minute=0,second=0,microsecond=0) | ||||
Fernando Perez
|
r4427 | |||
Thomas Kluyver
|
r4385 | def _parse_datetime(s): | ||
"""Parse dates in the format returned by the Github API.""" | ||||
MinRK
|
r6416 | if s: | ||
return datetime.strptime(s, ISO8601) | ||||
else: | ||||
return datetime.fromtimestamp(0) | ||||
Thomas Kluyver
|
r4385 | |||
Fernando Perez
|
r4427 | |||
def issues2dict(issues): | ||||
"""Convert a list of issues to a dict, keyed by issue number.""" | ||||
idict = {} | ||||
for i in issues: | ||||
idict[i['number']] = i | ||||
return idict | ||||
def is_pull_request(issue): | ||||
"""Return True if the given issue is a pull request.""" | ||||
MinRK
|
r11584 | return bool(issue.get('pull_request', {}).get('html_url', None)) | ||
def split_pulls(all_issues, project="ipython/ipython"): | ||||
"""split a list of closed issues into non-PR Issues and Pull Requests""" | ||||
pulls = [] | ||||
issues = [] | ||||
for i in all_issues: | ||||
if is_pull_request(i): | ||||
pull = get_pull_request(project, i['number'], auth=True) | ||||
pulls.append(pull) | ||||
else: | ||||
issues.append(i) | ||||
return issues, pulls | ||||
Fernando Perez
|
r4427 | |||
MinRK
|
r11584 | |||
MinRK
|
r11586 | def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False): | ||
Thomas Kluyver
|
r4385 | """Get all issues closed since a particular point in time. period | ||
MinRK
|
r11584 | can either be a datetime object, or a timedelta object. In the | ||
latter case, it is used as a time before the present. | ||||
""" | ||||
MinRK
|
r6416 | |||
MinRK
|
r11586 | which = 'pulls' if pulls else 'issues' | ||
MinRK
|
r6416 | |||
Thomas Kluyver
|
r4385 | if isinstance(period, timedelta): | ||
MinRK
|
r11586 | since = round_hour(datetime.utcnow() - period) | ||
MinRK
|
r11584 | else: | ||
since = period | ||||
url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE) | ||||
allclosed = get_paged_request(url, headers=make_auth_header()) | ||||
MinRK
|
r7752 | |||
MinRK
|
r11586 | filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ] | ||
if pulls: | ||||
filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ] | ||||
# filter out PRs not against master (backports) | ||||
filtered = [ i for i in filtered if i['base']['ref'] == 'master' ] | ||||
else: | ||||
filtered = [ i for i in filtered if not is_pull_request(i) ] | ||||
MinRK
|
r7752 | |||
MinRK
|
r11586 | return filtered | ||
Thomas Kluyver
|
r4385 | |||
Fernando Perez
|
r4427 | |||
def sorted_by_field(issues, field='closed_at', reverse=False): | ||||
"""Return a list of issues sorted by closing date date.""" | ||||
return sorted(issues, key = lambda i:i[field], reverse=reverse) | ||||
def report(issues, show_urls=False): | ||||
"""Summary report about a list of issues, printing number and title. | ||||
""" | ||||
# titles may have unicode in them, so we must encode everything below | ||||
if show_urls: | ||||
for i in issues: | ||||
MinRK
|
r7752 | role = 'ghpull' if 'merged_at' in i else 'ghissue' | ||
MinRK
|
r6416 | print('* :%s:`%d`: %s' % (role, i['number'], | ||
Fernando Perez
|
r4427 | i['title'].encode('utf-8'))) | ||
else: | ||||
for i in issues: | ||||
print('* %d: %s' % (i['number'], i['title'].encode('utf-8'))) | ||||
Fernando Perez
|
r4433 | #----------------------------------------------------------------------------- | ||
# Main script | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4427 | |||
Thomas Kluyver
|
r4385 | if __name__ == "__main__": | ||
MinRK
|
r11586 | # deal with unicode | ||
import codecs | ||||
sys.stdout = codecs.getwriter('utf8')(sys.stdout) | ||||
Fernando Perez
|
r4433 | # Whether to add reST urls for all issues in printout. | ||
Fernando Perez
|
r4427 | show_urls = True | ||
Fernando Perez
|
r4433 | |||
# By default, search one month back | ||||
MinRK
|
r7758 | tag = None | ||
Thomas Kluyver
|
r4385 | if len(sys.argv) > 1: | ||
MinRK
|
r7758 | try: | ||
days = int(sys.argv[1]) | ||||
except: | ||||
tag = sys.argv[1] | ||||
Thomas Kluyver
|
r4385 | else: | ||
MinRK
|
r7758 | tag = check_output(['git', 'describe', '--abbrev=0']).strip() | ||
if tag: | ||||
cmd = ['git', 'log', '-1', '--format=%ai', tag] | ||||
tagday, tz = check_output(cmd).strip().rsplit(' ', 1) | ||||
MinRK
|
r11586 | since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S") | ||
h = int(tz[1:3]) | ||||
m = int(tz[3:]) | ||||
td = timedelta(hours=h, minutes=m) | ||||
if tz[0] == '-': | ||||
since += td | ||||
else: | ||||
since -= td | ||||
MinRK
|
r7758 | else: | ||
MinRK
|
r11586 | since = datetime.utcnow() - timedelta(days=days) | ||
since = round_hour(since) | ||||
Fernando Perez
|
r4427 | |||
MinRK
|
r7758 | print("fetching GitHub stats since %s (tag: %s)" % (since, tag), file=sys.stderr) | ||
Fernando Perez
|
r4433 | # turn off to play interactively without redownloading, use %run -i | ||
Fernando Perez
|
r4427 | if 1: | ||
MinRK
|
r11586 | issues = issues_closed_since(since, pulls=False) | ||
pulls = issues_closed_since(since, pulls=True) | ||||
Fernando Perez
|
r4433 | # For regular reports, it's nice to show them in reverse chronological order | ||
issues = sorted_by_field(issues, reverse=True) | ||||
MinRK
|
r6416 | pulls = sorted_by_field(pulls, reverse=True) | ||
n_issues, n_pulls = map(len, (issues, pulls)) | ||||
n_total = n_issues + n_pulls | ||||
MinRK
|
r7758 | |||
Fernando Perez
|
r4433 | # Print summary report we can directly include into release notes. | ||
MinRK
|
r11586 | |||
MinRK
|
r7758 | print() | ||
since_day = since.strftime("%Y/%m/%d") | ||||
today = datetime.today().strftime("%Y/%m/%d") | ||||
print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag)) | ||||
print() | ||||
print("These lists are automatically generated, and may be incomplete or contain duplicates.") | ||||
print() | ||||
if tag: | ||||
# print git info, in addition to GitHub info: | ||||
since_tag = tag+'..' | ||||
cmd = ['git', 'log', '--oneline', since_tag] | ||||
ncommits = len(check_output(cmd).splitlines()) | ||||
MinRK
|
r11964 | author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag] | ||
MinRK
|
r11586 | all_authors = check_output(author_cmd).decode('utf-8', 'replace').splitlines() | ||
unique_authors = sorted(set(all_authors), key=lambda s: s.lower()) | ||||
MinRK
|
r7758 | print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits)) | ||
print() | ||||
print('\n'.join(unique_authors)) | ||||
print() | ||||
print() | ||||
print("We closed a total of %d issues, %d pull requests and %d regular issues;\n" | ||||
"this is the full list (generated with the script \n" | ||||
":file:`tools/github_stats.py`):" % (n_total, n_pulls, n_issues)) | ||||
Fernando Perez
|
r4427 | print() | ||
MinRK
|
r6416 | print('Pull Requests (%d):\n' % n_pulls) | ||
Fernando Perez
|
r4427 | report(pulls, show_urls) | ||
print() | ||||
MinRK
|
r6416 | print('Issues (%d):\n' % n_issues) | ||
report(issues, show_urls) | ||||