##// END OF EJS Templates
scripts: extract contributor data out of update-copyrights.py...
Thomas De Schampheleire -
r7495:050a94de default
parent child Browse files
Show More
@@ -0,0 +1,89 b''
1 # -*- coding: utf-8 -*-
2
3 # Some committers are so wrong that it doesn't point at any contributor:
4 total_ignore = set()
5 total_ignore.add('*** failed to import extension hggit: No module named hggit')
6 total_ignore.add('<>')
7
8 # Normalize some committer names where people have contributed under different
9 # names or email addresses:
10 name_fixes = {}
11 name_fixes['Andrew Shadura'] = "Andrej Shadura <andrew@shadura.me>"
12 name_fixes['aparkar'] = "Aparkar <aparkar@icloud.com>"
13 name_fixes['Aras Pranckevicius'] = "Aras Pranckevičius <aras@unity3d.com>"
14 name_fixes['Augosto Hermann'] = "Augusto Herrmann <augusto.herrmann@planejamento.gov.br>"
15 name_fixes['"Bradley M. Kuhn" <bkuhn@ebb.org>'] = "Bradley M. Kuhn <bkuhn@sfconservancy.org>"
16 name_fixes['dmitri.kuznetsov'] = "Dmitri Kuznetsov"
17 name_fixes['Dmitri Kuznetsov'] = "Dmitri Kuznetsov"
18 name_fixes['domruf'] = "Dominik Ruf <dominikruf@gmail.com>"
19 name_fixes['Ingo von borstel'] = "Ingo von Borstel <kallithea@planetmaker.de>"
20 name_fixes['Jan Heylen'] = "Jan Heylen <heyleke@gmail.com>"
21 name_fixes['Jason F. Harris'] = "Jason Harris <jason@jasonfharris.com>"
22 name_fixes['Jelmer Vernooij'] = "Jelmer VernooΔ³ <jelmer@samba.org>"
23 name_fixes['jfh <jason@jasonfharris.com>'] = "Jason Harris <jason@jasonfharris.com>"
24 name_fixes['Leonardo Carneiro<leonardo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
25 name_fixes['leonardo'] = "Leonardo Carneiro <leonardo@unity3d.com>"
26 name_fixes['Leonardo <leo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
27 name_fixes['Les Peabody'] = "Les Peabody <lpeabody@gmail.com>"
28 name_fixes['"Lorenzo M. Catucci" <lorenzo@sancho.ccd.uniroma2.it>'] = "Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>"
29 name_fixes['Lukasz Balcerzak'] = "Łukasz Balcerzak <lukaszbalcerzak@gmail.com>"
30 name_fixes['mao <mao@lins.fju.edu.tw>'] = "Ching-Chen Mao <mao@lins.fju.edu.tw>"
31 name_fixes['marcink'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
32 name_fixes['Marcin Kuzminski'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
33 name_fixes['nansenat16@null.tw'] = "nansenat16 <nansenat16@null.tw>"
34 name_fixes['Peter Vitt'] = "Peter Vitt <petervitt@web.de>"
35 name_fixes['philip.j@hostdime.com'] = "Philip Jameson <philip.j@hostdime.com>"
36 name_fixes['SΓΈren LΓΈvborg'] = "SΓΈren LΓΈvborg <sorenl@unity3d.com>"
37 name_fixes['Thomas De Schampheleire'] = "Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>"
38 name_fixes['Weblate'] = "<>"
39 name_fixes['xpol'] = "xpol <xpolife@gmail.com>"
40 name_fixes['Lars <devel@sumpfralle.de>'] = "Lars Kruse <devel@sumpfralle.de>"
41
42 # Some committer email address domains that indicate that another entity might
43 # hold some copyright too:
44 domain_extra = {}
45 domain_extra['unity3d.com'] = "Unity Technologies"
46 domain_extra['rhodecode.com'] = "RhodeCode GmbH"
47
48 # Repository history show some old contributions that traditionally hasn't been
49 # listed in about.html - preserve that:
50 no_about = set(total_ignore)
51 # The following contributors were traditionally not listed in about.html and it
52 # seems unclear if the copyright is personal or belongs to a company.
53 no_about.add(('Thayne Harbaugh <thayne@fusionio.com>', '2011'))
54 no_about.add(('Dies Koper <diesk@fast.au.fujitsu.com>', '2012'))
55 no_about.add(('Erwin Kroon <e.kroon@smartmetersolutions.nl>', '2012'))
56 no_about.add(('Vincent Caron <vcaron@bearstech.com>', '2012'))
57 # These contributors' contributions might be too small to be copyrightable:
58 no_about.add(('philip.j@hostdime.com', '2012'))
59 no_about.add(('Stefan Engel <mail@engel-stefan.de>', '2012'))
60 no_about.add(('Ton Plomp <tcplomp@gmail.com>', '2013'))
61 # Was reworked and contributed later and shadowed by other contributions:
62 no_about.add(('Sean Farley <sean.michael.farley@gmail.com>', '2013'))
63
64 # Preserve contributors listed in about.html but not appearing in repository
65 # history:
66 other_about = [
67 ("2011", "Aparkar <aparkar@icloud.com>"),
68 ("2010", "RhodeCode GmbH"),
69 ("2011", "RhodeCode GmbH"),
70 ("2012", "RhodeCode GmbH"),
71 ("2013", "RhodeCode GmbH"),
72 ]
73
74 # Preserve contributors listed in CONTRIBUTORS but not appearing in repository
75 # history:
76 other_contributors = [
77 ("", "Andrew Kesterson <andrew@aklabs.net>"),
78 ("", "cejones"),
79 ("", "David A. SjΓΈen <david.sjoen@westcon.no>"),
80 ("", "James Rhodes <jrhodes@redpointsoftware.com.au>"),
81 ("", "Jonas Oberschweiber <jonas.oberschweiber@d-velop.de>"),
82 ("", "larikale"),
83 ("", "RhodeCode GmbH"),
84 ("", "Sebastian Kreutzberger <sebastian@rhodecode.com>"),
85 ("", "Steve Romanow <slestak989@gmail.com>"),
86 ("", "SteveCohen"),
87 ("", "Thomas <thomas@rhodecode.com>"),
88 ("", "Thomas Waldmann <tw-public@gmx.de>"),
89 ]
@@ -1,254 +1,164 b''
1 1 #!/usr/bin/env python2
2 2 # -*- coding: utf-8 -*-
3 3
4 4 """
5 5 Kallithea script for maintaining contributor lists from version control
6 6 history.
7 7
8 8 This script and the data in it is a best effort attempt at reverse engineering
9 9 previous attributions and correlate that with version control history while
10 10 preserving all existing copyright statements and attribution. This script is
11 11 processing and summarizing information found elsewhere - it is not by itself
12 12 making any claims. Comments in the script are an attempt at reverse engineering
13 13 possible explanations - they are not showing any intent or confirming it is
14 14 correct.
15 15
16 16 Three files are generated / modified by this script:
17 17
18 18 kallithea/templates/about.html claims to show copyright holders, and the GPL
19 19 license requires such existing "legal notices" to be preserved. We also try to
20 20 keep it updated with copyright holders, but do not claim it is a correct list.
21 21
22 22 CONTRIBUTORS has the purpose of giving credit where credit is due and list all
23 23 the contributor names in the source.
24 24
25 25 kallithea/templates/base/base.html contains the copyright years in the page
26 26 footer.
27 27
28 28 Both make a best effort of listing all copyright holders, but revision control
29 29 history might be a better and more definitive source.
30 30
31 31 Contributors are sorted "fairly" by copyright year and amount of
32 32 contribution.
33 33
34 34 New contributors are listed, without considering if the contribution contains
35 35 copyrightable work.
36 36
37 37 When the copyright might belong to a different legal entity than the
38 38 contributor, the legal entity is given credit too.
39 39 """
40 40
41
42 # Some committers are so wrong that it doesn't point at any contributor:
43 total_ignore = set()
44 total_ignore.add('*** failed to import extension hggit: No module named hggit')
45 total_ignore.add('<>')
46
47 # Normalize some committer names where people have contributed under different
48 # names or email addresses:
49 name_fixes = {}
50 name_fixes['Andrew Shadura'] = "Andrej Shadura <andrew@shadura.me>"
51 name_fixes['aparkar'] = "Aparkar <aparkar@icloud.com>"
52 name_fixes['Aras Pranckevicius'] = "Aras Pranckevičius <aras@unity3d.com>"
53 name_fixes['Augosto Hermann'] = "Augusto Herrmann <augusto.herrmann@planejamento.gov.br>"
54 name_fixes['"Bradley M. Kuhn" <bkuhn@ebb.org>'] = "Bradley M. Kuhn <bkuhn@sfconservancy.org>"
55 name_fixes['dmitri.kuznetsov'] = "Dmitri Kuznetsov"
56 name_fixes['Dmitri Kuznetsov'] = "Dmitri Kuznetsov"
57 name_fixes['domruf'] = "Dominik Ruf <dominikruf@gmail.com>"
58 name_fixes['Ingo von borstel'] = "Ingo von Borstel <kallithea@planetmaker.de>"
59 name_fixes['Jan Heylen'] = "Jan Heylen <heyleke@gmail.com>"
60 name_fixes['Jason F. Harris'] = "Jason Harris <jason@jasonfharris.com>"
61 name_fixes['Jelmer Vernooij'] = "Jelmer VernooΔ³ <jelmer@samba.org>"
62 name_fixes['jfh <jason@jasonfharris.com>'] = "Jason Harris <jason@jasonfharris.com>"
63 name_fixes['Leonardo Carneiro<leonardo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
64 name_fixes['leonardo'] = "Leonardo Carneiro <leonardo@unity3d.com>"
65 name_fixes['Leonardo <leo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
66 name_fixes['Les Peabody'] = "Les Peabody <lpeabody@gmail.com>"
67 name_fixes['"Lorenzo M. Catucci" <lorenzo@sancho.ccd.uniroma2.it>'] = "Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>"
68 name_fixes['Lukasz Balcerzak'] = "Łukasz Balcerzak <lukaszbalcerzak@gmail.com>"
69 name_fixes['mao <mao@lins.fju.edu.tw>'] = "Ching-Chen Mao <mao@lins.fju.edu.tw>"
70 name_fixes['marcink'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
71 name_fixes['Marcin Kuzminski'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
72 name_fixes['nansenat16@null.tw'] = "nansenat16 <nansenat16@null.tw>"
73 name_fixes['Peter Vitt'] = "Peter Vitt <petervitt@web.de>"
74 name_fixes['philip.j@hostdime.com'] = "Philip Jameson <philip.j@hostdime.com>"
75 name_fixes['SΓΈren LΓΈvborg'] = "SΓΈren LΓΈvborg <sorenl@unity3d.com>"
76 name_fixes['Thomas De Schampheleire'] = "Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>"
77 name_fixes['Weblate'] = "<>"
78 name_fixes['xpol'] = "xpol <xpolife@gmail.com>"
79 name_fixes['Lars <devel@sumpfralle.de>'] = "Lars Kruse <devel@sumpfralle.de>"
80
81
82 # Some committer email address domains that indicate that another entity might
83 # hold some copyright too:
84 domain_extra = {}
85 domain_extra['unity3d.com'] = "Unity Technologies"
86 domain_extra['rhodecode.com'] = "RhodeCode GmbH"
87
88 # Repository history show some old contributions that traditionally hasn't been
89 # listed in about.html - preserve that:
90 no_about = set(total_ignore)
91 # The following contributors were traditionally not listed in about.html and it
92 # seems unclear if the copyright is personal or belongs to a company.
93 no_about.add(('Thayne Harbaugh <thayne@fusionio.com>', '2011'))
94 no_about.add(('Dies Koper <diesk@fast.au.fujitsu.com>', '2012'))
95 no_about.add(('Erwin Kroon <e.kroon@smartmetersolutions.nl>', '2012'))
96 no_about.add(('Vincent Caron <vcaron@bearstech.com>', '2012'))
97 # These contributors' contributions might be too small to be copyrightable:
98 no_about.add(('philip.j@hostdime.com', '2012'))
99 no_about.add(('Stefan Engel <mail@engel-stefan.de>', '2012'))
100 no_about.add(('Ton Plomp <tcplomp@gmail.com>', '2013'))
101 # Was reworked and contributed later and shadowed by other contributions:
102 no_about.add(('Sean Farley <sean.michael.farley@gmail.com>', '2013'))
103
104 # Preserve contributors listed in about.html but not appearing in repository
105 # history:
106 other_about = [
107 ("2011", "Aparkar <aparkar@icloud.com>"),
108 ("2010", "RhodeCode GmbH"),
109 ("2011", "RhodeCode GmbH"),
110 ("2012", "RhodeCode GmbH"),
111 ("2013", "RhodeCode GmbH"),
112 ]
113
114 # Preserve contributors listed in CONTRIBUTORS but not appearing in repository
115 # history:
116 other_contributors = [
117 ("", "Andrew Kesterson <andrew@aklabs.net>"),
118 ("", "cejones"),
119 ("", "David A. SjΓΈen <david.sjoen@westcon.no>"),
120 ("", "James Rhodes <jrhodes@redpointsoftware.com.au>"),
121 ("", "Jonas Oberschweiber <jonas.oberschweiber@d-velop.de>"),
122 ("", "larikale"),
123 ("", "RhodeCode GmbH"),
124 ("", "Sebastian Kreutzberger <sebastian@rhodecode.com>"),
125 ("", "Steve Romanow <slestak989@gmail.com>"),
126 ("", "SteveCohen"),
127 ("", "Thomas <thomas@rhodecode.com>"),
128 ("", "Thomas Waldmann <tw-public@gmx.de>"),
129 ]
130
131
132 41 import os
133 42 import re
134 43 from collections import defaultdict
44 import contributor_data
135 45
136 46
137 47 def sortkey(x):
138 48 """Return key for sorting contributors "fairly":
139 49 * latest contribution
140 50 * first contribution
141 51 * number of contribution years
142 52 * name (with some unicode normalization)
143 53 The entries must be 2-tuples of a list of string years and the unicode name"""
144 54 return (x[0] and -int(x[0][-1]),
145 55 x[0] and int(x[0][0]),
146 56 -len(x[0]),
147 57 x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
148 58 )
149 59
150 60
151 61 def nice_years(l, dash='-', join=' '):
152 62 """Convert a list of years into brief range like '1900-1901, 1921'."""
153 63 if not l:
154 64 return ''
155 65 start = end = int(l[0])
156 66 ranges = []
157 67 for year in l[1:] + [0]:
158 68 year = int(year)
159 69 if year == end + 1:
160 70 end = year
161 71 continue
162 72 if start == end:
163 73 ranges.append('%s' % start)
164 74 else:
165 75 ranges.append('%s%s%s' % (start, dash, end))
166 76 start = end = year
167 77 assert start == 0 and end == 0, (start, end)
168 78 return join.join(ranges)
169 79
170 80
171 81 def insert_entries(
172 82 filename,
173 83 all_entries,
174 84 no_entries,
175 85 domain_extra,
176 86 split_re,
177 87 normalize_name,
178 88 format_f):
179 89 """Update file with contributor information.
180 90 all_entries: list of tuples with year and name
181 91 no_entries: set of names or name and year tuples to ignore
182 92 domain_extra: map domain name to extra credit name
183 93 split_re: regexp matching the part of file to rewrite
184 94 normalize_name: function to normalize names for grouping and display
185 95 format_f: function formatting year list and name to a string
186 96 """
187 97 name_years = defaultdict(set)
188 98
189 99 for year, name in all_entries:
190 100 if name in no_entries or (name, year) in no_entries:
191 101 continue
192 102 domain = name.split('@', 1)[-1].rstrip('>')
193 103 if domain in domain_extra:
194 104 name_years[domain_extra[domain]].add(year)
195 105 name_years[normalize_name(name)].add(year)
196 106
197 107 l = [(list(sorted(year for year in years if year)), name)
198 108 for name, years in name_years.items()]
199 109 l.sort(key=sortkey)
200 110
201 111 with open(filename) as f:
202 112 pre, post = re.split(split_re, f.read())
203 113
204 114 with open(filename, 'w') as f:
205 115 f.write(pre +
206 116 ''.join(format_f(years, name) for years, name in l) +
207 117 post)
208 118
209 119
210 120 def main():
211 121 repo_entries = [
212 (year, name_fixes.get(name) or name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
122 (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
213 123 for year, name in
214 124 (line.strip().split(' ', 1)
215 125 for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
216 126 ]
217 127
218 128 insert_entries(
219 129 filename='kallithea/templates/about.html',
220 all_entries=repo_entries + other_about,
221 no_entries=no_about,
222 domain_extra=domain_extra,
130 all_entries=repo_entries + contributor_data.other_about,
131 no_entries=contributor_data.no_about,
132 domain_extra=contributor_data.domain_extra,
223 133 split_re=r'(?: <li>Copyright &copy; [^\n]*</li>\n)*',
224 134 normalize_name=lambda name: name.split('<', 1)[0].strip(),
225 135 format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
226 136 )
227 137
228 138 insert_entries(
229 139 filename='CONTRIBUTORS',
230 all_entries=repo_entries + other_contributors,
231 no_entries=total_ignore,
232 domain_extra=domain_extra,
140 all_entries=repo_entries + contributor_data.other_contributors,
141 no_entries=contributor_data.total_ignore,
142 domain_extra=contributor_data.domain_extra,
233 143 split_re=r'(?: [^\n]*\n)*',
234 144 normalize_name=lambda name: name,
235 145 format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
236 146 )
237 147
238 148 insert_entries(
239 149 filename='kallithea/templates/base/base.html',
240 150 all_entries=repo_entries,
241 no_entries=total_ignore,
151 no_entries=contributor_data.total_ignore,
242 152 domain_extra={},
243 153 split_re=r'(?<=&copy;) .* (?=by various authors)',
244 154 normalize_name=lambda name: '',
245 155 format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
246 156 )
247 157
248 158
249 159 if __name__ == '__main__':
250 160 main()
251 161
252 162
253 163 # To list new contributors since last tagging:
254 164 # { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u
General Comments 0
You need to be logged in to leave comments. Login now