##// END OF EJS Templates
teach github_stats about milestones...
MinRK -
Show More
@@ -1,194 +1,209 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """Simple tools to query github.com and gather stats about issues.
2 """Simple tools to query github.com and gather stats about issues.
3
4 To generate a report for IPython 2.0, run:
5
6 python github_stats.py --milestone 2.0 --since-tag rel-1.0.0
3 """
7 """
4 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
5 # Imports
9 # Imports
6 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
7
11
8 from __future__ import print_function
12 from __future__ import print_function
9
13
14 import codecs
10 import json
15 import json
11 import re
16 import re
12 import sys
17 import sys
13
18
19 from argparse import ArgumentParser
14 from datetime import datetime, timedelta
20 from datetime import datetime, timedelta
15 from subprocess import check_output
21 from subprocess import check_output
16 from gh_api import get_paged_request, make_auth_header, get_pull_request, is_pull_request
22 from gh_api import (
17
23 get_paged_request, make_auth_header, get_pull_request, is_pull_request,
24 get_milestone_id, get_issues_list,
25 )
18 #-----------------------------------------------------------------------------
26 #-----------------------------------------------------------------------------
19 # Globals
27 # Globals
20 #-----------------------------------------------------------------------------
28 #-----------------------------------------------------------------------------
21
29
22 ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
30 ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
23 PER_PAGE = 100
31 PER_PAGE = 100
24
32
25 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
26 # Functions
34 # Functions
27 #-----------------------------------------------------------------------------
35 #-----------------------------------------------------------------------------
28
36
29 def get_issues(project="ipython/ipython", state="closed", pulls=False):
30 """Get a list of the issues from the Github API."""
31 which = 'pulls' if pulls else 'issues'
32 url = "https://api.github.com/repos/%s/%s?state=%s&per_page=%i" % (project, which, state, PER_PAGE)
33 return get_paged_request(url, headers=make_auth_header())
34
35 def round_hour(dt):
37 def round_hour(dt):
36 return dt.replace(minute=0,second=0,microsecond=0)
38 return dt.replace(minute=0,second=0,microsecond=0)
37
39
38 def _parse_datetime(s):
40 def _parse_datetime(s):
39 """Parse dates in the format returned by the Github API."""
41 """Parse dates in the format returned by the Github API."""
40 if s:
42 if s:
41 return datetime.strptime(s, ISO8601)
43 return datetime.strptime(s, ISO8601)
42 else:
44 else:
43 return datetime.fromtimestamp(0)
45 return datetime.fromtimestamp(0)
44
46
45
46 def issues2dict(issues):
47 def issues2dict(issues):
47 """Convert a list of issues to a dict, keyed by issue number."""
48 """Convert a list of issues to a dict, keyed by issue number."""
48 idict = {}
49 idict = {}
49 for i in issues:
50 for i in issues:
50 idict[i['number']] = i
51 idict[i['number']] = i
51 return idict
52 return idict
52
53
53 def split_pulls(all_issues, project="ipython/ipython"):
54 def split_pulls(all_issues, project="ipython/ipython"):
54 """split a list of closed issues into non-PR Issues and Pull Requests"""
55 """split a list of closed issues into non-PR Issues and Pull Requests"""
55 pulls = []
56 pulls = []
56 issues = []
57 issues = []
57 for i in all_issues:
58 for i in all_issues:
58 if is_pull_request(i):
59 if is_pull_request(i):
59 pull = get_pull_request(project, i['number'], auth=True)
60 pull = get_pull_request(project, i['number'], auth=True)
60 pulls.append(pull)
61 pulls.append(pull)
61 else:
62 else:
62 issues.append(i)
63 issues.append(i)
63 return issues, pulls
64 return issues, pulls
64
65
65
66
66
67 def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False):
67 def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False):
68 """Get all issues closed since a particular point in time. period
68 """Get all issues closed since a particular point in time. period
69 can either be a datetime object, or a timedelta object. In the
69 can either be a datetime object, or a timedelta object. In the
70 latter case, it is used as a time before the present.
70 latter case, it is used as a time before the present.
71 """
71 """
72
72
73 which = 'pulls' if pulls else 'issues'
73 which = 'pulls' if pulls else 'issues'
74
74
75 if isinstance(period, timedelta):
75 if isinstance(period, timedelta):
76 since = round_hour(datetime.utcnow() - period)
76 since = round_hour(datetime.utcnow() - period)
77 else:
77 else:
78 since = period
78 since = period
79 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE)
79 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE)
80 allclosed = get_paged_request(url, headers=make_auth_header())
80 allclosed = get_paged_request(url, headers=make_auth_header())
81
81
82 filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ]
82 filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ]
83 if pulls:
83 if pulls:
84 filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ]
84 filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ]
85 # filter out PRs not against master (backports)
85 # filter out PRs not against master (backports)
86 filtered = [ i for i in filtered if i['base']['ref'] == 'master' ]
86 filtered = [ i for i in filtered if i['base']['ref'] == 'master' ]
87 else:
87 else:
88 filtered = [ i for i in filtered if not is_pull_request(i) ]
88 filtered = [ i for i in filtered if not is_pull_request(i) ]
89
89
90 return filtered
90 return filtered
91
91
92
92
93 def sorted_by_field(issues, field='closed_at', reverse=False):
93 def sorted_by_field(issues, field='closed_at', reverse=False):
94 """Return a list of issues sorted by closing date date."""
94 """Return a list of issues sorted by closing date date."""
95 return sorted(issues, key = lambda i:i[field], reverse=reverse)
95 return sorted(issues, key = lambda i:i[field], reverse=reverse)
96
96
97
97
98 def report(issues, show_urls=False):
98 def report(issues, show_urls=False):
99 """Summary report about a list of issues, printing number and title.
99 """Summary report about a list of issues, printing number and title.
100 """
100 """
101 # titles may have unicode in them, so we must encode everything below
101 # titles may have unicode in them, so we must encode everything below
102 if show_urls:
102 if show_urls:
103 for i in issues:
103 for i in issues:
104 role = 'ghpull' if 'merged_at' in i else 'ghissue'
104 role = 'ghpull' if 'merged_at' in i else 'ghissue'
105 print('* :%s:`%d`: %s' % (role, i['number'],
105 print('* :%s:`%d`: %s' % (role, i['number'],
106 i['title'].encode('utf-8')))
106 i['title'].encode('utf-8')))
107 else:
107 else:
108 for i in issues:
108 for i in issues:
109 print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))
109 print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))
110
110
111 #-----------------------------------------------------------------------------
111 #-----------------------------------------------------------------------------
112 # Main script
112 # Main script
113 #-----------------------------------------------------------------------------
113 #-----------------------------------------------------------------------------
114
114
115 if __name__ == "__main__":
115 if __name__ == "__main__":
116 # deal with unicode
116 # deal with unicode
117 import codecs
118 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
117 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
119
118
120 # Whether to add reST urls for all issues in printout.
119 # Whether to add reST urls for all issues in printout.
121 show_urls = True
120 show_urls = True
122
123 # By default, search one month back
124 tag = None
125 if len(sys.argv) > 1:
126 try:
127 days = int(sys.argv[1])
128 except:
129 tag = sys.argv[1]
130 else:
131 tag = check_output(['git', 'describe', '--abbrev=0']).strip()
132
121
133 if tag:
122 parser = ArgumentParser()
123 parser.add_argument('--since-tag', type=str,
124 help="The git tag to use for the starting point (typically the last major release)."
125 )
126 parser.add_argument('--milestone', type=str,
127 help="The GitHub milestone to use for filtering issues [optional]."
128 )
129 parser.add_argument('--days', type=int,
130 help="The number of days of data to summarize (use this or --since-tag)."
131 )
132
133 opts = parser.parse_args()
134 tag = opts.since_tag
135
136 # set `since` from days or git tag
137 if opts.days:
138 since = datetime.utcnow() - timedelta(days=opts.days)
139 else:
140 if not tag:
141 tag = check_output(['git', 'describe', '--abbrev=0']).strip()
134 cmd = ['git', 'log', '-1', '--format=%ai', tag]
142 cmd = ['git', 'log', '-1', '--format=%ai', tag]
135 tagday, tz = check_output(cmd).strip().rsplit(' ', 1)
143 tagday, tz = check_output(cmd).strip().rsplit(' ', 1)
136 since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
144 since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
137 h = int(tz[1:3])
145 h = int(tz[1:3])
138 m = int(tz[3:])
146 m = int(tz[3:])
139 td = timedelta(hours=h, minutes=m)
147 td = timedelta(hours=h, minutes=m)
140 if tz[0] == '-':
148 if tz[0] == '-':
141 since += td
149 since += td
142 else:
150 else:
143 since -= td
151 since -= td
144 else:
145 since = datetime.utcnow() - timedelta(days=days)
146
152
147 since = round_hour(since)
153 since = round_hour(since)
148
154
149 print("fetching GitHub stats since %s (tag: %s)" % (since, tag), file=sys.stderr)
155 milestone = opts.milestone
150 # turn off to play interactively without redownloading, use %run -i
156
151 if 1:
157 print("fetching GitHub stats since %s (tag: %s, milestone: %s)" % (since, tag, milestone), file=sys.stderr)
158 if milestone:
159 milestone_id = get_milestone_id("ipython/ipython", milestone,
160 auth=True)
161 issues = get_issues_list("ipython/ipython",
162 milestone=milestone_id,
163 state='closed',
164 auth=True,
165 )
166 else:
152 issues = issues_closed_since(since, pulls=False)
167 issues = issues_closed_since(since, pulls=False)
153 pulls = issues_closed_since(since, pulls=True)
168 pulls = issues_closed_since(since, pulls=True)
154
169
155 # For regular reports, it's nice to show them in reverse chronological order
170 # For regular reports, it's nice to show them in reverse chronological order
156 issues = sorted_by_field(issues, reverse=True)
171 issues = sorted_by_field(issues, reverse=True)
157 pulls = sorted_by_field(pulls, reverse=True)
172 pulls = sorted_by_field(pulls, reverse=True)
158
173
159 n_issues, n_pulls = map(len, (issues, pulls))
174 n_issues, n_pulls = map(len, (issues, pulls))
160 n_total = n_issues + n_pulls
175 n_total = n_issues + n_pulls
161
176
162 # Print summary report we can directly include into release notes.
177 # Print summary report we can directly include into release notes.
163
178
164 print()
179 print()
165 since_day = since.strftime("%Y/%m/%d")
180 since_day = since.strftime("%Y/%m/%d")
166 today = datetime.today().strftime("%Y/%m/%d")
181 today = datetime.today().strftime("%Y/%m/%d")
167 print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
182 print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
168 print()
183 print()
169 print("These lists are automatically generated, and may be incomplete or contain duplicates.")
184 print("These lists are automatically generated, and may be incomplete or contain duplicates.")
170 print()
185 print()
171 if tag:
186 if tag:
172 # print git info, in addition to GitHub info:
187 # print git info, in addition to GitHub info:
173 since_tag = tag+'..'
188 since_tag = tag+'..'
174 cmd = ['git', 'log', '--oneline', since_tag]
189 cmd = ['git', 'log', '--oneline', since_tag]
175 ncommits = len(check_output(cmd).splitlines())
190 ncommits = len(check_output(cmd).splitlines())
176
191
177 author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag]
192 author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag]
178 all_authors = check_output(author_cmd).decode('utf-8', 'replace').splitlines()
193 all_authors = check_output(author_cmd).decode('utf-8', 'replace').splitlines()
179 unique_authors = sorted(set(all_authors), key=lambda s: s.lower())
194 unique_authors = sorted(set(all_authors), key=lambda s: s.lower())
180 print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits))
195 print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits))
181 print()
196 print()
182 print('\n'.join(unique_authors))
197 print('\n'.join(unique_authors))
183 print()
198 print()
184
199
185 print()
200 print()
186 print("We closed a total of %d issues, %d pull requests and %d regular issues;\n"
201 print("We closed a total of %d issues, %d pull requests and %d regular issues;\n"
187 "this is the full list (generated with the script \n"
202 "this is the full list (generated with the script \n"
188 ":file:`tools/github_stats.py`):" % (n_total, n_pulls, n_issues))
203 ":file:`tools/github_stats.py`):" % (n_total, n_pulls, n_issues))
189 print()
204 print()
190 print('Pull Requests (%d):\n' % n_pulls)
205 print('Pull Requests (%d):\n' % n_pulls)
191 report(pulls, show_urls)
206 report(pulls, show_urls)
192 print()
207 print()
193 print('Issues (%d):\n' % n_issues)
208 print('Issues (%d):\n' % n_issues)
194 report(issues, show_urls)
209 report(issues, show_urls)
General Comments 0
You need to be logged in to leave comments. Login now