##// END OF EJS Templates
tweaks to GitHub stats script...
Min RK -
Show More
@@ -1,224 +1,232 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """Simple tools to query github.com and gather stats about issues.
2 """Simple tools to query github.com and gather stats about issues.
3
3
4 To generate a report for IPython 2.0, run:
4 To generate a report for IPython 2.0, run:
5
5
6 python github_stats.py --milestone 2.0 --since-tag rel-1.0.0
6 python github_stats.py --milestone 2.0 --since-tag rel-1.0.0
7 """
7 """
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9 # Imports
9 # Imports
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11
11
12 from __future__ import print_function
12 from __future__ import print_function
13
13
14 import codecs
14 import codecs
15 import sys
15 import sys
16
16
17 from argparse import ArgumentParser
17 from argparse import ArgumentParser
18 from datetime import datetime, timedelta
18 from datetime import datetime, timedelta
19 from subprocess import check_output
19 from subprocess import check_output
20
20
21 from gh_api import (
21 from gh_api import (
22 get_paged_request, make_auth_header, get_pull_request, is_pull_request,
22 get_paged_request, make_auth_header, get_pull_request, is_pull_request,
23 get_milestone_id, get_issues_list, get_authors,
23 get_milestone_id, get_issues_list, get_authors,
24 )
24 )
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Globals
26 # Globals
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28
28
29 ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
29 ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
30 PER_PAGE = 100
30 PER_PAGE = 100
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Functions
33 # Functions
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 def round_hour(dt):
36 def round_hour(dt):
37 return dt.replace(minute=0,second=0,microsecond=0)
37 return dt.replace(minute=0,second=0,microsecond=0)
38
38
39 def _parse_datetime(s):
39 def _parse_datetime(s):
40 """Parse dates in the format returned by the Github API."""
40 """Parse dates in the format returned by the Github API."""
41 if s:
41 if s:
42 return datetime.strptime(s, ISO8601)
42 return datetime.strptime(s, ISO8601)
43 else:
43 else:
44 return datetime.fromtimestamp(0)
44 return datetime.fromtimestamp(0)
45
45
46 def issues2dict(issues):
46 def issues2dict(issues):
47 """Convert a list of issues to a dict, keyed by issue number."""
47 """Convert a list of issues to a dict, keyed by issue number."""
48 idict = {}
48 idict = {}
49 for i in issues:
49 for i in issues:
50 idict[i['number']] = i
50 idict[i['number']] = i
51 return idict
51 return idict
52
52
53 def split_pulls(all_issues, project="ipython/ipython"):
53 def split_pulls(all_issues, project="ipython/ipython"):
54 """split a list of closed issues into non-PR Issues and Pull Requests"""
54 """split a list of closed issues into non-PR Issues and Pull Requests"""
55 pulls = []
55 pulls = []
56 issues = []
56 issues = []
57 for i in all_issues:
57 for i in all_issues:
58 if is_pull_request(i):
58 if is_pull_request(i):
59 pull = get_pull_request(project, i['number'], auth=True)
59 pull = get_pull_request(project, i['number'], auth=True)
60 pulls.append(pull)
60 pulls.append(pull)
61 else:
61 else:
62 issues.append(i)
62 issues.append(i)
63 return issues, pulls
63 return issues, pulls
64
64
65
65
66 def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False):
66 def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False):
67 """Get all issues closed since a particular point in time. period
67 """Get all issues closed since a particular point in time. period
68 can either be a datetime object, or a timedelta object. In the
68 can either be a datetime object, or a timedelta object. In the
69 latter case, it is used as a time before the present.
69 latter case, it is used as a time before the present.
70 """
70 """
71
71
72 which = 'pulls' if pulls else 'issues'
72 which = 'pulls' if pulls else 'issues'
73
73
74 if isinstance(period, timedelta):
74 if isinstance(period, timedelta):
75 since = round_hour(datetime.utcnow() - period)
75 since = round_hour(datetime.utcnow() - period)
76 else:
76 else:
77 since = period
77 since = period
78 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE)
78 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE)
79 allclosed = get_paged_request(url, headers=make_auth_header())
79 allclosed = get_paged_request(url, headers=make_auth_header())
80
80
81 filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ]
81 filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ]
82 if pulls:
82 if pulls:
83 filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ]
83 filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ]
84 # filter out PRs not against master (backports)
84 # filter out PRs not against master (backports)
85 filtered = [ i for i in filtered if i['base']['ref'] == 'master' ]
85 filtered = [ i for i in filtered if i['base']['ref'] == 'master' ]
86 else:
86 else:
87 filtered = [ i for i in filtered if not is_pull_request(i) ]
87 filtered = [ i for i in filtered if not is_pull_request(i) ]
88
88
89 return filtered
89 return filtered
90
90
91
91
92 def sorted_by_field(issues, field='closed_at', reverse=False):
92 def sorted_by_field(issues, field='closed_at', reverse=False):
93 """Return a list of issues sorted by closing date date."""
93 """Return a list of issues sorted by closing date date."""
94 return sorted(issues, key = lambda i:i[field], reverse=reverse)
94 return sorted(issues, key = lambda i:i[field], reverse=reverse)
95
95
96
96
97 def report(issues, show_urls=False):
97 def report(issues, show_urls=False):
98 """Summary report about a list of issues, printing number and title.
98 """Summary report about a list of issues, printing number and title."""
99 """
100 # titles may have unicode in them, so we must encode everything below
101 if show_urls:
99 if show_urls:
102 for i in issues:
100 for i in issues:
103 role = 'ghpull' if 'merged_at' in i else 'ghissue'
101 role = 'ghpull' if 'merged_at' in i else 'ghissue'
104 print(u'* :%s:`%d`: %s' % (role, i['number'],
102 print(u'* :%s:`%d`: %s' % (role, i['number'],
105 i['title'].replace(u'`', u'``')))
103 i['title'].replace(u'`', u'``')))
106 else:
104 else:
107 for i in issues:
105 for i in issues:
108 print(u'* %d: %s' % (i['number'], i['title'].replace(u'`', u'``')))
106 print(u'* %d: %s' % (i['number'], i['title'].replace(u'`', u'``')))
109
107
110 #-----------------------------------------------------------------------------
108 #-----------------------------------------------------------------------------
111 # Main script
109 # Main script
112 #-----------------------------------------------------------------------------
110 #-----------------------------------------------------------------------------
113
111
114 if __name__ == "__main__":
112 if __name__ == "__main__":
115 # deal with unicode
113 # deal with unicode
116 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
114 if sys.version_info < (3,):
115 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
117
116
118 # Whether to add reST urls for all issues in printout.
117 # Whether to add reST urls for all issues in printout.
119 show_urls = True
118 show_urls = True
120
119
121 parser = ArgumentParser()
120 parser = ArgumentParser()
122 parser.add_argument('--since-tag', type=str,
121 parser.add_argument('--since-tag', type=str,
123 help="The git tag to use for the starting point (typically the last major release)."
122 help="The git tag to use for the starting point (typically the last major release)."
124 )
123 )
125 parser.add_argument('--milestone', type=str,
124 parser.add_argument('--milestone', type=str,
126 help="The GitHub milestone to use for filtering issues [optional]."
125 help="The GitHub milestone to use for filtering issues [optional]."
127 )
126 )
128 parser.add_argument('--days', type=int,
127 parser.add_argument('--days', type=int,
129 help="The number of days of data to summarize (use this or --since-tag)."
128 help="The number of days of data to summarize (use this or --since-tag)."
130 )
129 )
131 parser.add_argument('--project', type=str, default="ipython/ipython",
130 parser.add_argument('--project', type=str, default="ipython/ipython",
132 help="The project to summarize."
131 help="The project to summarize."
133 )
132 )
133 parser.add_argument('--links', action='store_true', default=False,
134 help="Include links to all closed Issues and PRs in the output."
135 )
134
136
135 opts = parser.parse_args()
137 opts = parser.parse_args()
136 tag = opts.since_tag
138 tag = opts.since_tag
137
139
138 # set `since` from days or git tag
140 # set `since` from days or git tag
139 if opts.days:
141 if opts.days:
140 since = datetime.utcnow() - timedelta(days=opts.days)
142 since = datetime.utcnow() - timedelta(days=opts.days)
141 else:
143 else:
142 if not tag:
144 if not tag:
143 tag = check_output(['git', 'describe', '--abbrev=0']).strip()
145 tag = check_output(['git', 'describe', '--abbrev=0']).strip().decode('utf8')
144 cmd = ['git', 'log', '-1', '--format=%ai', tag]
146 cmd = ['git', 'log', '-1', '--format=%ai', tag]
145 tagday, tz = check_output(cmd).strip().rsplit(' ', 1)
147 tagday, tz = check_output(cmd).strip().decode('utf8').rsplit(' ', 1)
146 since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
148 since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
147 h = int(tz[1:3])
149 h = int(tz[1:3])
148 m = int(tz[3:])
150 m = int(tz[3:])
149 td = timedelta(hours=h, minutes=m)
151 td = timedelta(hours=h, minutes=m)
150 if tz[0] == '-':
152 if tz[0] == '-':
151 since += td
153 since += td
152 else:
154 else:
153 since -= td
155 since -= td
154
156
155 since = round_hour(since)
157 since = round_hour(since)
156
158
157 milestone = opts.milestone
159 milestone = opts.milestone
158 project = opts.project
160 project = opts.project
159
161
160 print("fetching GitHub stats since %s (tag: %s, milestone: %s)" % (since, tag, milestone), file=sys.stderr)
162 print("fetching GitHub stats since %s (tag: %s, milestone: %s)" % (since, tag, milestone), file=sys.stderr)
161 if milestone:
163 if milestone:
162 milestone_id = get_milestone_id(project=project, milestone=milestone,
164 milestone_id = get_milestone_id(project=project, milestone=milestone,
163 auth=True)
165 auth=True)
164 issues_and_pulls = get_issues_list(project=project,
166 issues_and_pulls = get_issues_list(project=project,
165 milestone=milestone_id,
167 milestone=milestone_id,
166 state='closed',
168 state='closed',
167 auth=True,
169 auth=True,
168 )
170 )
169 issues, pulls = split_pulls(issues_and_pulls)
171 issues, pulls = split_pulls(issues_and_pulls)
170 else:
172 else:
171 issues = issues_closed_since(since, project=project, pulls=False)
173 issues = issues_closed_since(since, project=project, pulls=False)
172 pulls = issues_closed_since(since, project=project, pulls=True)
174 pulls = issues_closed_since(since, project=project, pulls=True)
173
175
174 # For regular reports, it's nice to show them in reverse chronological order
176 # For regular reports, it's nice to show them in reverse chronological order
175 issues = sorted_by_field(issues, reverse=True)
177 issues = sorted_by_field(issues, reverse=True)
176 pulls = sorted_by_field(pulls, reverse=True)
178 pulls = sorted_by_field(pulls, reverse=True)
177
179
178 n_issues, n_pulls = map(len, (issues, pulls))
180 n_issues, n_pulls = map(len, (issues, pulls))
179 n_total = n_issues + n_pulls
181 n_total = n_issues + n_pulls
180
182
181 # Print summary report we can directly include into release notes.
183 # Print summary report we can directly include into release notes.
182
184
183 print()
185 print()
184 since_day = since.strftime("%Y/%m/%d")
186 since_day = since.strftime("%Y/%m/%d")
185 today = datetime.today().strftime("%Y/%m/%d")
187 today = datetime.today().strftime("%Y/%m/%d")
186 print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
188 print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
187 print()
189 print()
188 print("These lists are automatically generated, and may be incomplete or contain duplicates.")
190 print("These lists are automatically generated, and may be incomplete or contain duplicates.")
189 print()
191 print()
190
192
191 ncommits = 0
193 ncommits = 0
192 all_authors = []
194 all_authors = []
193 if tag:
195 if tag:
194 # print git info, in addition to GitHub info:
196 # print git info, in addition to GitHub info:
195 since_tag = tag+'..'
197 since_tag = tag+'..'
196 cmd = ['git', 'log', '--oneline', since_tag]
198 cmd = ['git', 'log', '--oneline', since_tag]
197 ncommits += len(check_output(cmd).splitlines())
199 ncommits += len(check_output(cmd).splitlines())
198
200
199 author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag]
201 author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag]
200 all_authors.extend(check_output(author_cmd).decode('utf-8', 'replace').splitlines())
202 all_authors.extend(check_output(author_cmd).decode('utf-8', 'replace').splitlines())
201
203
202 pr_authors = []
204 pr_authors = []
203 for pr in pulls:
205 for pr in pulls:
204 pr_authors.extend(get_authors(pr))
206 pr_authors.extend(get_authors(pr))
205 ncommits = len(pr_authors) + ncommits - len(pulls)
207 ncommits = len(pr_authors) + ncommits - len(pulls)
206 author_cmd = ['git', 'check-mailmap'] + pr_authors
208 author_cmd = ['git', 'check-mailmap'] + pr_authors
207 with_email = check_output(author_cmd).decode('utf-8', 'replace').splitlines()
209 with_email = check_output(author_cmd).decode('utf-8', 'replace').splitlines()
208 all_authors.extend([ u'* ' + a.split(' <')[0] for a in with_email ])
210 all_authors.extend([ u'* ' + a.split(' <')[0] for a in with_email ])
209 unique_authors = sorted(set(all_authors), key=lambda s: s.lower())
211 unique_authors = sorted(set(all_authors), key=lambda s: s.lower())
210
212
213 print("We closed %d issues and merged %d pull requests." % (n_pulls, n_issues))
214 if milestone:
215 print("The full list can be seen `on GitHub <https://github.com/%s/milestone/%s>`"
216 % (project, milestone)
217 )
218
219 print()
211 print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits))
220 print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits))
212 print()
221 print()
213 print('\n'.join(unique_authors))
222 print('\n'.join(unique_authors))
214
223
215 print()
224 if opts.links:
216 print("We closed %d issues and merged %d pull requests;\n"
225 print()
217 "this is the full list (generated with the script \n"
226 print("GitHub issues and pull requests:")
218 ":file:`tools/github_stats.py`):" % (n_pulls, n_issues))
227 print()
219 print()
228 print('Pull Requests (%d):\n' % n_pulls)
220 print('Pull Requests (%d):\n' % n_pulls)
229 report(pulls, show_urls)
221 report(pulls, show_urls)
230 print()
222 print()
231 print('Issues (%d):\n' % n_issues)
223 print('Issues (%d):\n' % n_issues)
232 report(issues, show_urls)
224 report(issues, show_urls)
General Comments 0
You need to be logged in to leave comments. Login now