github_stats.py
231 lines
| 8.2 KiB
| text/x-python
|
PythonLexer
/ tools / github_stats.py
Fernando Perez
|
r4427 | #!/usr/bin/env python | ||
"""Simple tools to query github.com and gather stats about issues. | ||||
MinRK
|
r16012 | |||
To generate a report for IPython 2.0, run: | ||||
python github_stats.py --milestone 2.0 --since-tag rel-1.0.0 | ||||
Fernando Perez
|
r4427 | """ | ||
Fernando Perez
|
r4433 | #----------------------------------------------------------------------------- | ||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4427 | |||
MinRK
|
r16012 | import codecs | ||
Thomas Kluyver
|
r4385 | import sys | ||
Fernando Perez
|
r4427 | |||
MinRK
|
r16012 | from argparse import ArgumentParser | ||
Fernando Perez
|
r4427 | from datetime import datetime, timedelta | ||
MinRK
|
r7758 | from subprocess import check_output | ||
MinRK
|
r16790 | |||
MinRK
|
r16012 | from gh_api import ( | ||
get_paged_request, make_auth_header, get_pull_request, is_pull_request, | ||||
MinRK
|
r16790 | get_milestone_id, get_issues_list, get_authors, | ||
MinRK
|
r16012 | ) | ||
Fernando Perez
|
r4433 | #----------------------------------------------------------------------------- | ||
MinRK
|
r6416 | # Globals | ||
#----------------------------------------------------------------------------- | ||||
ISO8601 = "%Y-%m-%dT%H:%M:%SZ" | ||||
PER_PAGE = 100 | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4433 | # Functions | ||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4427 | |||
MinRK
|
r11586 | def round_hour(dt): | ||
return dt.replace(minute=0,second=0,microsecond=0) | ||||
Fernando Perez
|
r4427 | |||
Thomas Kluyver
|
r4385 | def _parse_datetime(s): | ||
"""Parse dates in the format returned by the Github API.""" | ||||
MinRK
|
r6416 | if s: | ||
return datetime.strptime(s, ISO8601) | ||||
else: | ||||
return datetime.fromtimestamp(0) | ||||
Thomas Kluyver
|
r4385 | |||
Fernando Perez
|
r4427 | def issues2dict(issues): | ||
"""Convert a list of issues to a dict, keyed by issue number.""" | ||||
idict = {} | ||||
for i in issues: | ||||
idict[i['number']] = i | ||||
return idict | ||||
MinRK
|
r11584 | def split_pulls(all_issues, project="ipython/ipython"): | ||
"""split a list of closed issues into non-PR Issues and Pull Requests""" | ||||
pulls = [] | ||||
issues = [] | ||||
for i in all_issues: | ||||
if is_pull_request(i): | ||||
pull = get_pull_request(project, i['number'], auth=True) | ||||
pulls.append(pull) | ||||
else: | ||||
issues.append(i) | ||||
return issues, pulls | ||||
Fernando Perez
|
r4427 | |||
MinRK
|
r11586 | def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False): | ||
Thomas Kluyver
|
r4385 | """Get all issues closed since a particular point in time. period | ||
MinRK
|
r11584 | can either be a datetime object, or a timedelta object. In the | ||
latter case, it is used as a time before the present. | ||||
""" | ||||
MinRK
|
r6416 | |||
MinRK
|
r11586 | which = 'pulls' if pulls else 'issues' | ||
MinRK
|
r6416 | |||
Thomas Kluyver
|
r4385 | if isinstance(period, timedelta): | ||
MinRK
|
r11586 | since = round_hour(datetime.utcnow() - period) | ||
MinRK
|
r11584 | else: | ||
since = period | ||||
url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE) | ||||
allclosed = get_paged_request(url, headers=make_auth_header()) | ||||
MinRK
|
r7752 | |||
MinRK
|
r11586 | filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ] | ||
if pulls: | ||||
filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ] | ||||
# filter out PRs not against master (backports) | ||||
filtered = [ i for i in filtered if i['base']['ref'] == 'master' ] | ||||
else: | ||||
filtered = [ i for i in filtered if not is_pull_request(i) ] | ||||
MinRK
|
r7752 | |||
MinRK
|
r11586 | return filtered | ||
Thomas Kluyver
|
r4385 | |||
Fernando Perez
|
r4427 | |||
def sorted_by_field(issues, field='closed_at', reverse=False): | ||||
"""Return a list of issues sorted by closing date date.""" | ||||
return sorted(issues, key = lambda i:i[field], reverse=reverse) | ||||
def report(issues, show_urls=False): | ||||
Min RK
|
r20267 | """Summary report about a list of issues, printing number and title.""" | ||
Fernando Perez
|
r4427 | if show_urls: | ||
for i in issues: | ||||
MinRK
|
r7752 | role = 'ghpull' if 'merged_at' in i else 'ghissue' | ||
MinRK
|
r17581 | print(u'* :%s:`%d`: %s' % (role, i['number'], | ||
i['title'].replace(u'`', u'``'))) | ||||
Fernando Perez
|
r4427 | else: | ||
for i in issues: | ||||
MinRK
|
r17581 | print(u'* %d: %s' % (i['number'], i['title'].replace(u'`', u'``'))) | ||
Fernando Perez
|
r4427 | |||
Fernando Perez
|
r4433 | #----------------------------------------------------------------------------- | ||
# Main script | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r4427 | |||
Thomas Kluyver
|
r4385 | if __name__ == "__main__": | ||
Matthias Bussonnier
|
r22897 | |||
luz.paz
|
r24132 | print("DEPRECATE: backport_pr.py is deprecated and it is now recommended" | ||
Matthias Bussonnier
|
r22897 | "to install `ghpro` from PyPI.", file=sys.stderr) | ||
MinRK
|
r11586 | |||
Fernando Perez
|
r4433 | # Whether to add reST urls for all issues in printout. | ||
Fernando Perez
|
r4427 | show_urls = True | ||
MinRK
|
r7758 | |||
MinRK
|
r16012 | parser = ArgumentParser() | ||
parser.add_argument('--since-tag', type=str, | ||||
help="The git tag to use for the starting point (typically the last major release)." | ||||
) | ||||
parser.add_argument('--milestone', type=str, | ||||
help="The GitHub milestone to use for filtering issues [optional]." | ||||
) | ||||
parser.add_argument('--days', type=int, | ||||
help="The number of days of data to summarize (use this or --since-tag)." | ||||
) | ||||
W. Trevor King
|
r16365 | parser.add_argument('--project', type=str, default="ipython/ipython", | ||
help="The project to summarize." | ||||
) | ||||
Min RK
|
r20267 | parser.add_argument('--links', action='store_true', default=False, | ||
help="Include links to all closed Issues and PRs in the output." | ||||
) | ||||
MinRK
|
r16012 | |||
opts = parser.parse_args() | ||||
tag = opts.since_tag | ||||
# set `since` from days or git tag | ||||
if opts.days: | ||||
since = datetime.utcnow() - timedelta(days=opts.days) | ||||
else: | ||||
if not tag: | ||||
Min RK
|
r20267 | tag = check_output(['git', 'describe', '--abbrev=0']).strip().decode('utf8') | ||
MinRK
|
r7758 | cmd = ['git', 'log', '-1', '--format=%ai', tag] | ||
Min RK
|
r20267 | tagday, tz = check_output(cmd).strip().decode('utf8').rsplit(' ', 1) | ||
MinRK
|
r11586 | since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S") | ||
h = int(tz[1:3]) | ||||
m = int(tz[3:]) | ||||
td = timedelta(hours=h, minutes=m) | ||||
if tz[0] == '-': | ||||
since += td | ||||
else: | ||||
since -= td | ||||
since = round_hour(since) | ||||
MinRK
|
r16012 | |||
milestone = opts.milestone | ||||
W. Trevor King
|
r16365 | project = opts.project | ||
MinRK
|
r16012 | |||
print("fetching GitHub stats since %s (tag: %s, milestone: %s)" % (since, tag, milestone), file=sys.stderr) | ||||
if milestone: | ||||
W. Trevor King
|
r16365 | milestone_id = get_milestone_id(project=project, milestone=milestone, | ||
MinRK
|
r16012 | auth=True) | ||
MinRK
|
r16790 | issues_and_pulls = get_issues_list(project=project, | ||
MinRK
|
r16012 | milestone=milestone_id, | ||
state='closed', | ||||
auth=True, | ||||
) | ||||
Min RK
|
r22247 | issues, pulls = split_pulls(issues_and_pulls, project=project) | ||
MinRK
|
r16012 | else: | ||
W. Trevor King
|
r16365 | issues = issues_closed_since(since, project=project, pulls=False) | ||
MinRK
|
r16790 | pulls = issues_closed_since(since, project=project, pulls=True) | ||
MinRK
|
r11586 | |||
Fernando Perez
|
r4433 | # For regular reports, it's nice to show them in reverse chronological order | ||
issues = sorted_by_field(issues, reverse=True) | ||||
MinRK
|
r6416 | pulls = sorted_by_field(pulls, reverse=True) | ||
n_issues, n_pulls = map(len, (issues, pulls)) | ||||
n_total = n_issues + n_pulls | ||||
MinRK
|
r7758 | |||
Fernando Perez
|
r4433 | # Print summary report we can directly include into release notes. | ||
MinRK
|
r11586 | |||
MinRK
|
r7758 | print() | ||
since_day = since.strftime("%Y/%m/%d") | ||||
today = datetime.today().strftime("%Y/%m/%d") | ||||
print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag)) | ||||
print() | ||||
print("These lists are automatically generated, and may be incomplete or contain duplicates.") | ||||
print() | ||||
MinRK
|
r16790 | |||
ncommits = 0 | ||||
all_authors = [] | ||||
MinRK
|
r7758 | if tag: | ||
# print git info, in addition to GitHub info: | ||||
since_tag = tag+'..' | ||||
cmd = ['git', 'log', '--oneline', since_tag] | ||||
MinRK
|
r16790 | ncommits += len(check_output(cmd).splitlines()) | ||
MinRK
|
r7758 | |||
MinRK
|
r11964 | author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag] | ||
MinRK
|
r16790 | all_authors.extend(check_output(author_cmd).decode('utf-8', 'replace').splitlines()) | ||
pr_authors = [] | ||||
for pr in pulls: | ||||
pr_authors.extend(get_authors(pr)) | ||||
ncommits = len(pr_authors) + ncommits - len(pulls) | ||||
author_cmd = ['git', 'check-mailmap'] + pr_authors | ||||
with_email = check_output(author_cmd).decode('utf-8', 'replace').splitlines() | ||||
all_authors.extend([ u'* ' + a.split(' <')[0] for a in with_email ]) | ||||
unique_authors = sorted(set(all_authors), key=lambda s: s.lower()) | ||||
Min RK
|
r21124 | print("We closed %d issues and merged %d pull requests." % (n_issues, n_pulls)) | ||
Min RK
|
r20267 | if milestone: | ||
Thomas Kluyver
|
r24246 | print("The full list can be seen `on GitHub <https://github.com/{project}/issues?q=milestone%3A{milestone}>`__".format(project=project,milestone=milestone) | ||
Min RK
|
r20267 | ) | ||
print() | ||||
MinRK
|
r16790 | print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits)) | ||
print() | ||||
print('\n'.join(unique_authors)) | ||||
Min RK
|
r20267 | if opts.links: | ||
print() | ||||
print("GitHub issues and pull requests:") | ||||
print() | ||||
print('Pull Requests (%d):\n' % n_pulls) | ||||
report(pulls, show_urls) | ||||
print() | ||||
print('Issues (%d):\n' % n_issues) | ||||
report(issues, show_urls) | ||||