##// END OF EJS Templates
tweaks to GitHub stats script...
Min RK -
Show More
@@ -1,224 +1,232 b''
1 1 #!/usr/bin/env python
2 2 """Simple tools to query github.com and gather stats about issues.
3 3
4 4 To generate a report for IPython 2.0, run:
5 5
6 6 python github_stats.py --milestone 2.0 --since-tag rel-1.0.0
7 7 """
8 8 #-----------------------------------------------------------------------------
9 9 # Imports
10 10 #-----------------------------------------------------------------------------
11 11
12 12 from __future__ import print_function
13 13
14 14 import codecs
15 15 import sys
16 16
17 17 from argparse import ArgumentParser
18 18 from datetime import datetime, timedelta
19 19 from subprocess import check_output
20 20
21 21 from gh_api import (
22 22 get_paged_request, make_auth_header, get_pull_request, is_pull_request,
23 23 get_milestone_id, get_issues_list, get_authors,
24 24 )
25 25 #-----------------------------------------------------------------------------
26 26 # Globals
27 27 #-----------------------------------------------------------------------------
28 28
29 29 ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
30 30 PER_PAGE = 100
31 31
32 32 #-----------------------------------------------------------------------------
33 33 # Functions
34 34 #-----------------------------------------------------------------------------
35 35
36 36 def round_hour(dt):
37 37 return dt.replace(minute=0,second=0,microsecond=0)
38 38
39 39 def _parse_datetime(s):
40 40 """Parse dates in the format returned by the Github API."""
41 41 if s:
42 42 return datetime.strptime(s, ISO8601)
43 43 else:
44 44 return datetime.fromtimestamp(0)
45 45
46 46 def issues2dict(issues):
47 47 """Convert a list of issues to a dict, keyed by issue number."""
48 48 idict = {}
49 49 for i in issues:
50 50 idict[i['number']] = i
51 51 return idict
52 52
53 53 def split_pulls(all_issues, project="ipython/ipython"):
54 54 """split a list of closed issues into non-PR Issues and Pull Requests"""
55 55 pulls = []
56 56 issues = []
57 57 for i in all_issues:
58 58 if is_pull_request(i):
59 59 pull = get_pull_request(project, i['number'], auth=True)
60 60 pulls.append(pull)
61 61 else:
62 62 issues.append(i)
63 63 return issues, pulls
64 64
65 65
66 66 def issues_closed_since(period=timedelta(days=365), project="ipython/ipython", pulls=False):
67 67 """Get all issues closed since a particular point in time. period
68 68 can either be a datetime object, or a timedelta object. In the
69 69 latter case, it is used as a time before the present.
70 70 """
71 71
72 72 which = 'pulls' if pulls else 'issues'
73 73
74 74 if isinstance(period, timedelta):
75 75 since = round_hour(datetime.utcnow() - period)
76 76 else:
77 77 since = period
78 78 url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, since.strftime(ISO8601), PER_PAGE)
79 79 allclosed = get_paged_request(url, headers=make_auth_header())
80 80
81 81 filtered = [ i for i in allclosed if _parse_datetime(i['closed_at']) > since ]
82 82 if pulls:
83 83 filtered = [ i for i in filtered if _parse_datetime(i['merged_at']) > since ]
84 84 # filter out PRs not against master (backports)
85 85 filtered = [ i for i in filtered if i['base']['ref'] == 'master' ]
86 86 else:
87 87 filtered = [ i for i in filtered if not is_pull_request(i) ]
88 88
89 89 return filtered
90 90
91 91
92 92 def sorted_by_field(issues, field='closed_at', reverse=False):
93 93 """Return a list of issues sorted by closing date date."""
94 94 return sorted(issues, key = lambda i:i[field], reverse=reverse)
95 95
96 96
97 97 def report(issues, show_urls=False):
98 """Summary report about a list of issues, printing number and title.
99 """
100 # titles may have unicode in them, so we must encode everything below
98 """Summary report about a list of issues, printing number and title."""
101 99 if show_urls:
102 100 for i in issues:
103 101 role = 'ghpull' if 'merged_at' in i else 'ghissue'
104 102 print(u'* :%s:`%d`: %s' % (role, i['number'],
105 103 i['title'].replace(u'`', u'``')))
106 104 else:
107 105 for i in issues:
108 106 print(u'* %d: %s' % (i['number'], i['title'].replace(u'`', u'``')))
109 107
110 108 #-----------------------------------------------------------------------------
111 109 # Main script
112 110 #-----------------------------------------------------------------------------
113 111
114 112 if __name__ == "__main__":
115 113 # deal with unicode
116 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
114 if sys.version_info < (3,):
115 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
117 116
118 117 # Whether to add reST urls for all issues in printout.
119 118 show_urls = True
120 119
121 120 parser = ArgumentParser()
122 121 parser.add_argument('--since-tag', type=str,
123 122 help="The git tag to use for the starting point (typically the last major release)."
124 123 )
125 124 parser.add_argument('--milestone', type=str,
126 125 help="The GitHub milestone to use for filtering issues [optional]."
127 126 )
128 127 parser.add_argument('--days', type=int,
129 128 help="The number of days of data to summarize (use this or --since-tag)."
130 129 )
131 130 parser.add_argument('--project', type=str, default="ipython/ipython",
132 131 help="The project to summarize."
133 132 )
133 parser.add_argument('--links', action='store_true', default=False,
134 help="Include links to all closed Issues and PRs in the output."
135 )
134 136
135 137 opts = parser.parse_args()
136 138 tag = opts.since_tag
137 139
138 140 # set `since` from days or git tag
139 141 if opts.days:
140 142 since = datetime.utcnow() - timedelta(days=opts.days)
141 143 else:
142 144 if not tag:
143 tag = check_output(['git', 'describe', '--abbrev=0']).strip()
145 tag = check_output(['git', 'describe', '--abbrev=0']).strip().decode('utf8')
144 146 cmd = ['git', 'log', '-1', '--format=%ai', tag]
145 tagday, tz = check_output(cmd).strip().rsplit(' ', 1)
147 tagday, tz = check_output(cmd).strip().decode('utf8').rsplit(' ', 1)
146 148 since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
147 149 h = int(tz[1:3])
148 150 m = int(tz[3:])
149 151 td = timedelta(hours=h, minutes=m)
150 152 if tz[0] == '-':
151 153 since += td
152 154 else:
153 155 since -= td
154 156
155 157 since = round_hour(since)
156 158
157 159 milestone = opts.milestone
158 160 project = opts.project
159 161
160 162 print("fetching GitHub stats since %s (tag: %s, milestone: %s)" % (since, tag, milestone), file=sys.stderr)
161 163 if milestone:
162 164 milestone_id = get_milestone_id(project=project, milestone=milestone,
163 165 auth=True)
164 166 issues_and_pulls = get_issues_list(project=project,
165 167 milestone=milestone_id,
166 168 state='closed',
167 169 auth=True,
168 170 )
169 171 issues, pulls = split_pulls(issues_and_pulls)
170 172 else:
171 173 issues = issues_closed_since(since, project=project, pulls=False)
172 174 pulls = issues_closed_since(since, project=project, pulls=True)
173 175
174 176 # For regular reports, it's nice to show them in reverse chronological order
175 177 issues = sorted_by_field(issues, reverse=True)
176 178 pulls = sorted_by_field(pulls, reverse=True)
177 179
178 180 n_issues, n_pulls = map(len, (issues, pulls))
179 181 n_total = n_issues + n_pulls
180 182
181 183 # Print summary report we can directly include into release notes.
182 184
183 185 print()
184 186 since_day = since.strftime("%Y/%m/%d")
185 187 today = datetime.today().strftime("%Y/%m/%d")
186 188 print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
187 189 print()
188 190 print("These lists are automatically generated, and may be incomplete or contain duplicates.")
189 191 print()
190 192
191 193 ncommits = 0
192 194 all_authors = []
193 195 if tag:
194 196 # print git info, in addition to GitHub info:
195 197 since_tag = tag+'..'
196 198 cmd = ['git', 'log', '--oneline', since_tag]
197 199 ncommits += len(check_output(cmd).splitlines())
198 200
199 201 author_cmd = ['git', 'log', '--use-mailmap', "--format=* %aN", since_tag]
200 202 all_authors.extend(check_output(author_cmd).decode('utf-8', 'replace').splitlines())
201 203
202 204 pr_authors = []
203 205 for pr in pulls:
204 206 pr_authors.extend(get_authors(pr))
205 207 ncommits = len(pr_authors) + ncommits - len(pulls)
206 208 author_cmd = ['git', 'check-mailmap'] + pr_authors
207 209 with_email = check_output(author_cmd).decode('utf-8', 'replace').splitlines()
208 210 all_authors.extend([ u'* ' + a.split(' <')[0] for a in with_email ])
209 211 unique_authors = sorted(set(all_authors), key=lambda s: s.lower())
210 212
213 print("We closed %d issues and merged %d pull requests." % (n_pulls, n_issues))
214 if milestone:
215 print("The full list can be seen `on GitHub <https://github.com/%s/milestone/%s>`"
216 % (project, milestone)
217 )
218
219 print()
211 220 print("The following %i authors contributed %i commits." % (len(unique_authors), ncommits))
212 221 print()
213 222 print('\n'.join(unique_authors))
214 223
215 print()
216 print("We closed %d issues and merged %d pull requests;\n"
217 "this is the full list (generated with the script \n"
218 ":file:`tools/github_stats.py`):" % (n_pulls, n_issues))
219 print()
220 print('Pull Requests (%d):\n' % n_pulls)
221 report(pulls, show_urls)
222 print()
223 print('Issues (%d):\n' % n_issues)
224 report(issues, show_urls)
224 if opts.links:
225 print()
226 print("GitHub issues and pull requests:")
227 print()
228 print('Pull Requests (%d):\n' % n_pulls)
229 report(pulls, show_urls)
230 print()
231 print('Issues (%d):\n' % n_issues)
232 report(issues, show_urls)
General Comments 0
You need to be logged in to leave comments. Login now