##// END OF EJS Templates
scripts: lower case contributor emails to normalize them
Mads Kiilerich -
r8015:27d6f56a stable
parent child Browse files
Show More
@@ -1,176 +1,179 b''
1 #!/usr/bin/env python2
1 #!/usr/bin/env python2
2 # -*- coding: utf-8 -*-
2 # -*- coding: utf-8 -*-
3
3
4 """
4 """
5 Kallithea script for maintaining contributor lists from version control
5 Kallithea script for maintaining contributor lists from version control
6 history.
6 history.
7
7
8 This script and the data in it is a best effort attempt at reverse engineering
8 This script and the data in it is a best effort attempt at reverse engineering
9 previous attributions and correlate that with version control history while
9 previous attributions and correlate that with version control history while
10 preserving all existing copyright statements and attribution. This script is
10 preserving all existing copyright statements and attribution. This script is
11 processing and summarizing information found elsewhere - it is not by itself
11 processing and summarizing information found elsewhere - it is not by itself
12 making any claims. Comments in the script are an attempt at reverse engineering
12 making any claims. Comments in the script are an attempt at reverse engineering
13 possible explanations - they are not showing any intent or confirming it is
13 possible explanations - they are not showing any intent or confirming it is
14 correct.
14 correct.
15
15
16 Three files are generated / modified by this script:
16 Three files are generated / modified by this script:
17
17
18 kallithea/templates/about.html claims to show copyright holders, and the GPL
18 kallithea/templates/about.html claims to show copyright holders, and the GPL
19 license requires such existing "legal notices" to be preserved. We also try to
19 license requires such existing "legal notices" to be preserved. We also try to
20 keep it updated with copyright holders, but do not claim it is a correct list.
20 keep it updated with copyright holders, but do not claim it is a correct list.
21
21
22 CONTRIBUTORS has the purpose of giving credit where credit is due and list all
22 CONTRIBUTORS has the purpose of giving credit where credit is due and list all
23 the contributor names in the source.
23 the contributor names in the source.
24
24
25 kallithea/templates/base/base.html contains the copyright years in the page
25 kallithea/templates/base/base.html contains the copyright years in the page
26 footer.
26 footer.
27
27
28 Both make a best effort of listing all copyright holders, but revision control
28 Both make a best effort of listing all copyright holders, but revision control
29 history might be a better and more definitive source.
29 history might be a better and more definitive source.
30
30
31 Contributors are sorted "fairly" by copyright year and amount of
31 Contributors are sorted "fairly" by copyright year and amount of
32 contribution.
32 contribution.
33
33
34 New contributors are listed, without considering if the contribution contains
34 New contributors are listed, without considering if the contribution contains
35 copyrightable work.
35 copyrightable work.
36
36
37 When the copyright might belong to a different legal entity than the
37 When the copyright might belong to a different legal entity than the
38 contributor, the legal entity is given credit too.
38 contributor, the legal entity is given credit too.
39 """
39 """
40
40
41 import os
41 import os
42 import re
42 import re
43 from collections import defaultdict
43 from collections import defaultdict
44
44
45 import contributor_data
45 import contributor_data
46
46
47
47
48 def sortkey(x):
48 def sortkey(x):
49 """Return key for sorting contributors "fairly":
49 """Return key for sorting contributors "fairly":
50 * latest contribution
50 * latest contribution
51 * first contribution
51 * first contribution
52 * number of contribution years
52 * number of contribution years
53 * name (with some unicode normalization)
53 * name (with some unicode normalization)
54 The entries must be 2-tuples of a list of string years and the unicode name"""
54 The entries must be 2-tuples of a list of string years and the unicode name"""
55 return (x[0] and -int(x[0][-1]),
55 return (x[0] and -int(x[0][-1]),
56 x[0] and int(x[0][0]),
56 x[0] and int(x[0][0]),
57 -len(x[0]),
57 -len(x[0]),
58 x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
58 x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
59 )
59 )
60
60
61
61
62 def nice_years(l, dash='-', join=' '):
62 def nice_years(l, dash='-', join=' '):
63 """Convert a list of years into brief range like '1900-1901, 1921'."""
63 """Convert a list of years into brief range like '1900-1901, 1921'."""
64 if not l:
64 if not l:
65 return ''
65 return ''
66 start = end = int(l[0])
66 start = end = int(l[0])
67 ranges = []
67 ranges = []
68 for year in l[1:] + [0]:
68 for year in l[1:] + [0]:
69 year = int(year)
69 year = int(year)
70 if year == end + 1:
70 if year == end + 1:
71 end = year
71 end = year
72 continue
72 continue
73 if start == end:
73 if start == end:
74 ranges.append('%s' % start)
74 ranges.append('%s' % start)
75 else:
75 else:
76 ranges.append('%s%s%s' % (start, dash, end))
76 ranges.append('%s%s%s' % (start, dash, end))
77 start = end = year
77 start = end = year
78 assert start == 0 and end == 0, (start, end)
78 assert start == 0 and end == 0, (start, end)
79 return join.join(ranges)
79 return join.join(ranges)
80
80
81
81
82 def insert_entries(
82 def insert_entries(
83 filename,
83 filename,
84 all_entries,
84 all_entries,
85 no_entries,
85 no_entries,
86 domain_extra,
86 domain_extra,
87 split_re,
87 split_re,
88 normalize_name,
88 normalize_name,
89 format_f):
89 format_f):
90 """Update file with contributor information.
90 """Update file with contributor information.
91 all_entries: list of tuples with year and name
91 all_entries: list of tuples with year and name
92 no_entries: set of names or name and year tuples to ignore
92 no_entries: set of names or name and year tuples to ignore
93 domain_extra: map domain name to extra credit name
93 domain_extra: map domain name to extra credit name
94 split_re: regexp matching the part of file to rewrite
94 split_re: regexp matching the part of file to rewrite
95 normalize_name: function to normalize names for grouping and display
95 normalize_name: function to normalize names for grouping and display
96 format_f: function formatting year list and name to a string
96 format_f: function formatting year list and name to a string
97 """
97 """
98 name_years = defaultdict(set)
98 name_years = defaultdict(set)
99
99
100 for year, name in all_entries:
100 for year, name in all_entries:
101 if name in no_entries or (name, year) in no_entries:
101 if name in no_entries or (name, year) in no_entries:
102 continue
102 continue
103 parts = name.split(' <', 1)
104 if len(parts) == 2:
105 name = parts[0] + ' <' + parts[1].lower()
103 domain = name.split('@', 1)[-1].rstrip('>')
106 domain = name.split('@', 1)[-1].rstrip('>')
104 if domain in domain_extra:
107 if domain in domain_extra:
105 name_years[domain_extra[domain]].add(year)
108 name_years[domain_extra[domain]].add(year)
106 name_years[normalize_name(name)].add(year)
109 name_years[normalize_name(name)].add(year)
107
110
108 l = [(list(sorted(year for year in years if year)), name)
111 l = [(list(sorted(year for year in years if year)), name)
109 for name, years in name_years.items()]
112 for name, years in name_years.items()]
110 l.sort(key=sortkey)
113 l.sort(key=sortkey)
111
114
112 with open(filename) as f:
115 with open(filename) as f:
113 pre, post = re.split(split_re, f.read())
116 pre, post = re.split(split_re, f.read())
114
117
115 with open(filename, 'w') as f:
118 with open(filename, 'w') as f:
116 f.write(pre +
119 f.write(pre +
117 ''.join(format_f(years, name) for years, name in l) +
120 ''.join(format_f(years, name) for years, name in l) +
118 post)
121 post)
119
122
120
123
121 def main():
124 def main():
122 repo_entries = [
125 repo_entries = [
123 (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
126 (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
124 for year, name in
127 for year, name in
125 (line.strip().split(' ', 1)
128 (line.strip().split(' ', 1)
126 for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
129 for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
127 ]
130 ]
128
131
129 insert_entries(
132 insert_entries(
130 filename='kallithea/templates/about.html',
133 filename='kallithea/templates/about.html',
131 all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
134 all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
132 no_entries=contributor_data.no_about,
135 no_entries=contributor_data.no_about,
133 domain_extra=contributor_data.domain_extra,
136 domain_extra=contributor_data.domain_extra,
134 split_re=r'(?: <li>Copyright &copy; [^\n]*</li>\n)*',
137 split_re=r'(?: <li>Copyright &copy; [^\n]*</li>\n)*',
135 normalize_name=lambda name: name.split('<', 1)[0].strip(),
138 normalize_name=lambda name: name.split('<', 1)[0].strip(),
136 format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
139 format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
137 )
140 )
138
141
139 insert_entries(
142 insert_entries(
140 filename='CONTRIBUTORS',
143 filename='CONTRIBUTORS',
141 all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
144 all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
142 no_entries=contributor_data.total_ignore,
145 no_entries=contributor_data.total_ignore,
143 domain_extra=contributor_data.domain_extra,
146 domain_extra=contributor_data.domain_extra,
144 split_re=r'(?: [^\n]*\n)*',
147 split_re=r'(?: [^\n]*\n)*',
145 normalize_name=lambda name: name,
148 normalize_name=lambda name: name,
146 format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
149 format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
147 )
150 )
148
151
149 insert_entries(
152 insert_entries(
150 filename='kallithea/templates/base/base.html',
153 filename='kallithea/templates/base/base.html',
151 all_entries=repo_entries,
154 all_entries=repo_entries,
152 no_entries=contributor_data.total_ignore,
155 no_entries=contributor_data.total_ignore,
153 domain_extra={},
156 domain_extra={},
154 split_re=r'(?<=&copy;) .* (?=by various authors)',
157 split_re=r'(?<=&copy;) .* (?=by various authors)',
155 normalize_name=lambda name: '',
158 normalize_name=lambda name: '',
156 format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
159 format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
157 )
160 )
158
161
159 #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
162 #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
160 insert_entries(
163 insert_entries(
161 filename='docs/conf.py',
164 filename='docs/conf.py',
162 all_entries=repo_entries,
165 all_entries=repo_entries,
163 no_entries=contributor_data.total_ignore,
166 no_entries=contributor_data.total_ignore,
164 domain_extra={},
167 domain_extra={},
165 split_re=r"(?<=copyright = u').*(?= by various authors)",
168 split_re=r"(?<=copyright = u').*(?= by various authors)",
166 normalize_name=lambda name: '',
169 normalize_name=lambda name: '',
167 format_f=lambda years, name: nice_years(years, '-', ', '),
170 format_f=lambda years, name: nice_years(years, '-', ', '),
168 )
171 )
169
172
170
173
171 if __name__ == '__main__':
174 if __name__ == '__main__':
172 main()
175 main()
173
176
174
177
175 # To list new contributors since last tagging:
178 # To list new contributors since last tagging:
176 # { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u
179 # { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u
General Comments 0
You need to be logged in to leave comments. Login now