##// END OF EJS Templates
scripts: extract contributor data out of update-copyrights.py...
Thomas De Schampheleire -
r7495:050a94de default
parent child Browse files
Show More
@@ -0,0 +1,89 b''
1 # -*- coding: utf-8 -*-
2
3 # Some committers are so wrong that it doesn't point at any contributor:
4 total_ignore = set()
5 total_ignore.add('*** failed to import extension hggit: No module named hggit')
6 total_ignore.add('<>')
7
8 # Normalize some committer names where people have contributed under different
9 # names or email addresses:
10 name_fixes = {}
11 name_fixes['Andrew Shadura'] = "Andrej Shadura <andrew@shadura.me>"
12 name_fixes['aparkar'] = "Aparkar <aparkar@icloud.com>"
13 name_fixes['Aras Pranckevicius'] = "Aras Pranckevičius <aras@unity3d.com>"
14 name_fixes['Augosto Hermann'] = "Augusto Herrmann <augusto.herrmann@planejamento.gov.br>"
15 name_fixes['"Bradley M. Kuhn" <bkuhn@ebb.org>'] = "Bradley M. Kuhn <bkuhn@sfconservancy.org>"
16 name_fixes['dmitri.kuznetsov'] = "Dmitri Kuznetsov"
17 name_fixes['Dmitri Kuznetsov'] = "Dmitri Kuznetsov"
18 name_fixes['domruf'] = "Dominik Ruf <dominikruf@gmail.com>"
19 name_fixes['Ingo von borstel'] = "Ingo von Borstel <kallithea@planetmaker.de>"
20 name_fixes['Jan Heylen'] = "Jan Heylen <heyleke@gmail.com>"
21 name_fixes['Jason F. Harris'] = "Jason Harris <jason@jasonfharris.com>"
22 name_fixes['Jelmer Vernooij'] = "Jelmer VernooΔ³ <jelmer@samba.org>"
23 name_fixes['jfh <jason@jasonfharris.com>'] = "Jason Harris <jason@jasonfharris.com>"
24 name_fixes['Leonardo Carneiro<leonardo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
25 name_fixes['leonardo'] = "Leonardo Carneiro <leonardo@unity3d.com>"
26 name_fixes['Leonardo <leo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
27 name_fixes['Les Peabody'] = "Les Peabody <lpeabody@gmail.com>"
28 name_fixes['"Lorenzo M. Catucci" <lorenzo@sancho.ccd.uniroma2.it>'] = "Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>"
29 name_fixes['Lukasz Balcerzak'] = "Łukasz Balcerzak <lukaszbalcerzak@gmail.com>"
30 name_fixes['mao <mao@lins.fju.edu.tw>'] = "Ching-Chen Mao <mao@lins.fju.edu.tw>"
31 name_fixes['marcink'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
32 name_fixes['Marcin Kuzminski'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
33 name_fixes['nansenat16@null.tw'] = "nansenat16 <nansenat16@null.tw>"
34 name_fixes['Peter Vitt'] = "Peter Vitt <petervitt@web.de>"
35 name_fixes['philip.j@hostdime.com'] = "Philip Jameson <philip.j@hostdime.com>"
36 name_fixes['SΓΈren LΓΈvborg'] = "SΓΈren LΓΈvborg <sorenl@unity3d.com>"
37 name_fixes['Thomas De Schampheleire'] = "Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>"
38 name_fixes['Weblate'] = "<>"
39 name_fixes['xpol'] = "xpol <xpolife@gmail.com>"
40 name_fixes['Lars <devel@sumpfralle.de>'] = "Lars Kruse <devel@sumpfralle.de>"
41
42 # Some committer email address domains that indicate that another entity might
43 # hold some copyright too:
44 domain_extra = {}
45 domain_extra['unity3d.com'] = "Unity Technologies"
46 domain_extra['rhodecode.com'] = "RhodeCode GmbH"
47
48 # Repository history show some old contributions that traditionally hasn't been
49 # listed in about.html - preserve that:
50 no_about = set(total_ignore)
51 # The following contributors were traditionally not listed in about.html and it
52 # seems unclear if the copyright is personal or belongs to a company.
53 no_about.add(('Thayne Harbaugh <thayne@fusionio.com>', '2011'))
54 no_about.add(('Dies Koper <diesk@fast.au.fujitsu.com>', '2012'))
55 no_about.add(('Erwin Kroon <e.kroon@smartmetersolutions.nl>', '2012'))
56 no_about.add(('Vincent Caron <vcaron@bearstech.com>', '2012'))
57 # These contributors' contributions might be too small to be copyrightable:
58 no_about.add(('philip.j@hostdime.com', '2012'))
59 no_about.add(('Stefan Engel <mail@engel-stefan.de>', '2012'))
60 no_about.add(('Ton Plomp <tcplomp@gmail.com>', '2013'))
61 # Was reworked and contributed later and shadowed by other contributions:
62 no_about.add(('Sean Farley <sean.michael.farley@gmail.com>', '2013'))
63
64 # Preserve contributors listed in about.html but not appearing in repository
65 # history:
66 other_about = [
67 ("2011", "Aparkar <aparkar@icloud.com>"),
68 ("2010", "RhodeCode GmbH"),
69 ("2011", "RhodeCode GmbH"),
70 ("2012", "RhodeCode GmbH"),
71 ("2013", "RhodeCode GmbH"),
72 ]
73
74 # Preserve contributors listed in CONTRIBUTORS but not appearing in repository
75 # history:
76 other_contributors = [
77 ("", "Andrew Kesterson <andrew@aklabs.net>"),
78 ("", "cejones"),
79 ("", "David A. SjΓΈen <david.sjoen@westcon.no>"),
80 ("", "James Rhodes <jrhodes@redpointsoftware.com.au>"),
81 ("", "Jonas Oberschweiber <jonas.oberschweiber@d-velop.de>"),
82 ("", "larikale"),
83 ("", "RhodeCode GmbH"),
84 ("", "Sebastian Kreutzberger <sebastian@rhodecode.com>"),
85 ("", "Steve Romanow <slestak989@gmail.com>"),
86 ("", "SteveCohen"),
87 ("", "Thomas <thomas@rhodecode.com>"),
88 ("", "Thomas Waldmann <tw-public@gmx.de>"),
89 ]
@@ -1,254 +1,164 b''
1 #!/usr/bin/env python2
1 #!/usr/bin/env python2
2 # -*- coding: utf-8 -*-
2 # -*- coding: utf-8 -*-
3
3
4 """
4 """
5 Kallithea script for maintaining contributor lists from version control
5 Kallithea script for maintaining contributor lists from version control
6 history.
6 history.
7
7
8 This script and the data in it is a best effort attempt at reverse engineering
8 This script and the data in it is a best effort attempt at reverse engineering
9 previous attributions and correlate that with version control history while
9 previous attributions and correlate that with version control history while
10 preserving all existing copyright statements and attribution. This script is
10 preserving all existing copyright statements and attribution. This script is
11 processing and summarizing information found elsewhere - it is not by itself
11 processing and summarizing information found elsewhere - it is not by itself
12 making any claims. Comments in the script are an attempt at reverse engineering
12 making any claims. Comments in the script are an attempt at reverse engineering
13 possible explanations - they are not showing any intent or confirming it is
13 possible explanations - they are not showing any intent or confirming it is
14 correct.
14 correct.
15
15
16 Three files are generated / modified by this script:
16 Three files are generated / modified by this script:
17
17
18 kallithea/templates/about.html claims to show copyright holders, and the GPL
18 kallithea/templates/about.html claims to show copyright holders, and the GPL
19 license requires such existing "legal notices" to be preserved. We also try to
19 license requires such existing "legal notices" to be preserved. We also try to
20 keep it updated with copyright holders, but do not claim it is a correct list.
20 keep it updated with copyright holders, but do not claim it is a correct list.
21
21
22 CONTRIBUTORS has the purpose of giving credit where credit is due and list all
22 CONTRIBUTORS has the purpose of giving credit where credit is due and list all
23 the contributor names in the source.
23 the contributor names in the source.
24
24
25 kallithea/templates/base/base.html contains the copyright years in the page
25 kallithea/templates/base/base.html contains the copyright years in the page
26 footer.
26 footer.
27
27
28 Both make a best effort of listing all copyright holders, but revision control
28 Both make a best effort of listing all copyright holders, but revision control
29 history might be a better and more definitive source.
29 history might be a better and more definitive source.
30
30
31 Contributors are sorted "fairly" by copyright year and amount of
31 Contributors are sorted "fairly" by copyright year and amount of
32 contribution.
32 contribution.
33
33
34 New contributors are listed, without considering if the contribution contains
34 New contributors are listed, without considering if the contribution contains
35 copyrightable work.
35 copyrightable work.
36
36
37 When the copyright might belong to a different legal entity than the
37 When the copyright might belong to a different legal entity than the
38 contributor, the legal entity is given credit too.
38 contributor, the legal entity is given credit too.
39 """
39 """
40
40
41
42 # Some committers are so wrong that it doesn't point at any contributor:
43 total_ignore = set()
44 total_ignore.add('*** failed to import extension hggit: No module named hggit')
45 total_ignore.add('<>')
46
47 # Normalize some committer names where people have contributed under different
48 # names or email addresses:
49 name_fixes = {}
50 name_fixes['Andrew Shadura'] = "Andrej Shadura <andrew@shadura.me>"
51 name_fixes['aparkar'] = "Aparkar <aparkar@icloud.com>"
52 name_fixes['Aras Pranckevicius'] = "Aras Pranckevičius <aras@unity3d.com>"
53 name_fixes['Augosto Hermann'] = "Augusto Herrmann <augusto.herrmann@planejamento.gov.br>"
54 name_fixes['"Bradley M. Kuhn" <bkuhn@ebb.org>'] = "Bradley M. Kuhn <bkuhn@sfconservancy.org>"
55 name_fixes['dmitri.kuznetsov'] = "Dmitri Kuznetsov"
56 name_fixes['Dmitri Kuznetsov'] = "Dmitri Kuznetsov"
57 name_fixes['domruf'] = "Dominik Ruf <dominikruf@gmail.com>"
58 name_fixes['Ingo von borstel'] = "Ingo von Borstel <kallithea@planetmaker.de>"
59 name_fixes['Jan Heylen'] = "Jan Heylen <heyleke@gmail.com>"
60 name_fixes['Jason F. Harris'] = "Jason Harris <jason@jasonfharris.com>"
61 name_fixes['Jelmer Vernooij'] = "Jelmer VernooΔ³ <jelmer@samba.org>"
62 name_fixes['jfh <jason@jasonfharris.com>'] = "Jason Harris <jason@jasonfharris.com>"
63 name_fixes['Leonardo Carneiro<leonardo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
64 name_fixes['leonardo'] = "Leonardo Carneiro <leonardo@unity3d.com>"
65 name_fixes['Leonardo <leo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
66 name_fixes['Les Peabody'] = "Les Peabody <lpeabody@gmail.com>"
67 name_fixes['"Lorenzo M. Catucci" <lorenzo@sancho.ccd.uniroma2.it>'] = "Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>"
68 name_fixes['Lukasz Balcerzak'] = "Łukasz Balcerzak <lukaszbalcerzak@gmail.com>"
69 name_fixes['mao <mao@lins.fju.edu.tw>'] = "Ching-Chen Mao <mao@lins.fju.edu.tw>"
70 name_fixes['marcink'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
71 name_fixes['Marcin Kuzminski'] = "Marcin KuΕΊmiΕ„ski <marcin@python-works.com>"
72 name_fixes['nansenat16@null.tw'] = "nansenat16 <nansenat16@null.tw>"
73 name_fixes['Peter Vitt'] = "Peter Vitt <petervitt@web.de>"
74 name_fixes['philip.j@hostdime.com'] = "Philip Jameson <philip.j@hostdime.com>"
75 name_fixes['SΓΈren LΓΈvborg'] = "SΓΈren LΓΈvborg <sorenl@unity3d.com>"
76 name_fixes['Thomas De Schampheleire'] = "Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>"
77 name_fixes['Weblate'] = "<>"
78 name_fixes['xpol'] = "xpol <xpolife@gmail.com>"
79 name_fixes['Lars <devel@sumpfralle.de>'] = "Lars Kruse <devel@sumpfralle.de>"
80
81
82 # Some committer email address domains that indicate that another entity might
83 # hold some copyright too:
84 domain_extra = {}
85 domain_extra['unity3d.com'] = "Unity Technologies"
86 domain_extra['rhodecode.com'] = "RhodeCode GmbH"
87
88 # Repository history show some old contributions that traditionally hasn't been
89 # listed in about.html - preserve that:
90 no_about = set(total_ignore)
91 # The following contributors were traditionally not listed in about.html and it
92 # seems unclear if the copyright is personal or belongs to a company.
93 no_about.add(('Thayne Harbaugh <thayne@fusionio.com>', '2011'))
94 no_about.add(('Dies Koper <diesk@fast.au.fujitsu.com>', '2012'))
95 no_about.add(('Erwin Kroon <e.kroon@smartmetersolutions.nl>', '2012'))
96 no_about.add(('Vincent Caron <vcaron@bearstech.com>', '2012'))
97 # These contributors' contributions might be too small to be copyrightable:
98 no_about.add(('philip.j@hostdime.com', '2012'))
99 no_about.add(('Stefan Engel <mail@engel-stefan.de>', '2012'))
100 no_about.add(('Ton Plomp <tcplomp@gmail.com>', '2013'))
101 # Was reworked and contributed later and shadowed by other contributions:
102 no_about.add(('Sean Farley <sean.michael.farley@gmail.com>', '2013'))
103
104 # Preserve contributors listed in about.html but not appearing in repository
105 # history:
106 other_about = [
107 ("2011", "Aparkar <aparkar@icloud.com>"),
108 ("2010", "RhodeCode GmbH"),
109 ("2011", "RhodeCode GmbH"),
110 ("2012", "RhodeCode GmbH"),
111 ("2013", "RhodeCode GmbH"),
112 ]
113
114 # Preserve contributors listed in CONTRIBUTORS but not appearing in repository
115 # history:
116 other_contributors = [
117 ("", "Andrew Kesterson <andrew@aklabs.net>"),
118 ("", "cejones"),
119 ("", "David A. SjΓΈen <david.sjoen@westcon.no>"),
120 ("", "James Rhodes <jrhodes@redpointsoftware.com.au>"),
121 ("", "Jonas Oberschweiber <jonas.oberschweiber@d-velop.de>"),
122 ("", "larikale"),
123 ("", "RhodeCode GmbH"),
124 ("", "Sebastian Kreutzberger <sebastian@rhodecode.com>"),
125 ("", "Steve Romanow <slestak989@gmail.com>"),
126 ("", "SteveCohen"),
127 ("", "Thomas <thomas@rhodecode.com>"),
128 ("", "Thomas Waldmann <tw-public@gmx.de>"),
129 ]
130
131
132 import os
41 import os
133 import re
42 import re
134 from collections import defaultdict
43 from collections import defaultdict
44 import contributor_data
135
45
136
46
137 def sortkey(x):
47 def sortkey(x):
138 """Return key for sorting contributors "fairly":
48 """Return key for sorting contributors "fairly":
139 * latest contribution
49 * latest contribution
140 * first contribution
50 * first contribution
141 * number of contribution years
51 * number of contribution years
142 * name (with some unicode normalization)
52 * name (with some unicode normalization)
143 The entries must be 2-tuples of a list of string years and the unicode name"""
53 The entries must be 2-tuples of a list of string years and the unicode name"""
144 return (x[0] and -int(x[0][-1]),
54 return (x[0] and -int(x[0][-1]),
145 x[0] and int(x[0][0]),
55 x[0] and int(x[0][0]),
146 -len(x[0]),
56 -len(x[0]),
147 x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
57 x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
148 )
58 )
149
59
150
60
151 def nice_years(l, dash='-', join=' '):
61 def nice_years(l, dash='-', join=' '):
152 """Convert a list of years into brief range like '1900-1901, 1921'."""
62 """Convert a list of years into brief range like '1900-1901, 1921'."""
153 if not l:
63 if not l:
154 return ''
64 return ''
155 start = end = int(l[0])
65 start = end = int(l[0])
156 ranges = []
66 ranges = []
157 for year in l[1:] + [0]:
67 for year in l[1:] + [0]:
158 year = int(year)
68 year = int(year)
159 if year == end + 1:
69 if year == end + 1:
160 end = year
70 end = year
161 continue
71 continue
162 if start == end:
72 if start == end:
163 ranges.append('%s' % start)
73 ranges.append('%s' % start)
164 else:
74 else:
165 ranges.append('%s%s%s' % (start, dash, end))
75 ranges.append('%s%s%s' % (start, dash, end))
166 start = end = year
76 start = end = year
167 assert start == 0 and end == 0, (start, end)
77 assert start == 0 and end == 0, (start, end)
168 return join.join(ranges)
78 return join.join(ranges)
169
79
170
80
171 def insert_entries(
81 def insert_entries(
172 filename,
82 filename,
173 all_entries,
83 all_entries,
174 no_entries,
84 no_entries,
175 domain_extra,
85 domain_extra,
176 split_re,
86 split_re,
177 normalize_name,
87 normalize_name,
178 format_f):
88 format_f):
179 """Update file with contributor information.
89 """Update file with contributor information.
180 all_entries: list of tuples with year and name
90 all_entries: list of tuples with year and name
181 no_entries: set of names or name and year tuples to ignore
91 no_entries: set of names or name and year tuples to ignore
182 domain_extra: map domain name to extra credit name
92 domain_extra: map domain name to extra credit name
183 split_re: regexp matching the part of file to rewrite
93 split_re: regexp matching the part of file to rewrite
184 normalize_name: function to normalize names for grouping and display
94 normalize_name: function to normalize names for grouping and display
185 format_f: function formatting year list and name to a string
95 format_f: function formatting year list and name to a string
186 """
96 """
187 name_years = defaultdict(set)
97 name_years = defaultdict(set)
188
98
189 for year, name in all_entries:
99 for year, name in all_entries:
190 if name in no_entries or (name, year) in no_entries:
100 if name in no_entries or (name, year) in no_entries:
191 continue
101 continue
192 domain = name.split('@', 1)[-1].rstrip('>')
102 domain = name.split('@', 1)[-1].rstrip('>')
193 if domain in domain_extra:
103 if domain in domain_extra:
194 name_years[domain_extra[domain]].add(year)
104 name_years[domain_extra[domain]].add(year)
195 name_years[normalize_name(name)].add(year)
105 name_years[normalize_name(name)].add(year)
196
106
197 l = [(list(sorted(year for year in years if year)), name)
107 l = [(list(sorted(year for year in years if year)), name)
198 for name, years in name_years.items()]
108 for name, years in name_years.items()]
199 l.sort(key=sortkey)
109 l.sort(key=sortkey)
200
110
201 with open(filename) as f:
111 with open(filename) as f:
202 pre, post = re.split(split_re, f.read())
112 pre, post = re.split(split_re, f.read())
203
113
204 with open(filename, 'w') as f:
114 with open(filename, 'w') as f:
205 f.write(pre +
115 f.write(pre +
206 ''.join(format_f(years, name) for years, name in l) +
116 ''.join(format_f(years, name) for years, name in l) +
207 post)
117 post)
208
118
209
119
210 def main():
120 def main():
211 repo_entries = [
121 repo_entries = [
212 (year, name_fixes.get(name) or name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
122 (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
213 for year, name in
123 for year, name in
214 (line.strip().split(' ', 1)
124 (line.strip().split(' ', 1)
215 for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
125 for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
216 ]
126 ]
217
127
218 insert_entries(
128 insert_entries(
219 filename='kallithea/templates/about.html',
129 filename='kallithea/templates/about.html',
220 all_entries=repo_entries + other_about,
130 all_entries=repo_entries + contributor_data.other_about,
221 no_entries=no_about,
131 no_entries=contributor_data.no_about,
222 domain_extra=domain_extra,
132 domain_extra=contributor_data.domain_extra,
223 split_re=r'(?: <li>Copyright &copy; [^\n]*</li>\n)*',
133 split_re=r'(?: <li>Copyright &copy; [^\n]*</li>\n)*',
224 normalize_name=lambda name: name.split('<', 1)[0].strip(),
134 normalize_name=lambda name: name.split('<', 1)[0].strip(),
225 format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
135 format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
226 )
136 )
227
137
228 insert_entries(
138 insert_entries(
229 filename='CONTRIBUTORS',
139 filename='CONTRIBUTORS',
230 all_entries=repo_entries + other_contributors,
140 all_entries=repo_entries + contributor_data.other_contributors,
231 no_entries=total_ignore,
141 no_entries=contributor_data.total_ignore,
232 domain_extra=domain_extra,
142 domain_extra=contributor_data.domain_extra,
233 split_re=r'(?: [^\n]*\n)*',
143 split_re=r'(?: [^\n]*\n)*',
234 normalize_name=lambda name: name,
144 normalize_name=lambda name: name,
235 format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
145 format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
236 )
146 )
237
147
238 insert_entries(
148 insert_entries(
239 filename='kallithea/templates/base/base.html',
149 filename='kallithea/templates/base/base.html',
240 all_entries=repo_entries,
150 all_entries=repo_entries,
241 no_entries=total_ignore,
151 no_entries=contributor_data.total_ignore,
242 domain_extra={},
152 domain_extra={},
243 split_re=r'(?<=&copy;) .* (?=by various authors)',
153 split_re=r'(?<=&copy;) .* (?=by various authors)',
244 normalize_name=lambda name: '',
154 normalize_name=lambda name: '',
245 format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
155 format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
246 )
156 )
247
157
248
158
249 if __name__ == '__main__':
159 if __name__ == '__main__':
250 main()
160 main()
251
161
252
162
253 # To list new contributors since last tagging:
163 # To list new contributors since last tagging:
254 # { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u
164 # { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u
General Comments 0
You need to be logged in to leave comments. Login now