##// END OF EJS Templates
scripts: lower case contributor emails to normalize them
Mads Kiilerich -
r8015:27d6f56a stable
parent child Browse files
Show More
@@ -1,176 +1,179 b''
1 1 #!/usr/bin/env python2
2 2 # -*- coding: utf-8 -*-
3 3
4 4 """
5 5 Kallithea script for maintaining contributor lists from version control
6 6 history.
7 7
8 8 This script and the data in it is a best effort attempt at reverse engineering
9 9 previous attributions and correlate that with version control history while
10 10 preserving all existing copyright statements and attribution. This script is
11 11 processing and summarizing information found elsewhere - it is not by itself
12 12 making any claims. Comments in the script are an attempt at reverse engineering
13 13 possible explanations - they are not showing any intent or confirming it is
14 14 correct.
15 15
16 16 Three files are generated / modified by this script:
17 17
18 18 kallithea/templates/about.html claims to show copyright holders, and the GPL
19 19 license requires such existing "legal notices" to be preserved. We also try to
20 20 keep it updated with copyright holders, but do not claim it is a correct list.
21 21
22 22 CONTRIBUTORS has the purpose of giving credit where credit is due and list all
23 23 the contributor names in the source.
24 24
25 25 kallithea/templates/base/base.html contains the copyright years in the page
26 26 footer.
27 27
28 28 Both make a best effort of listing all copyright holders, but revision control
29 29 history might be a better and more definitive source.
30 30
31 31 Contributors are sorted "fairly" by copyright year and amount of
32 32 contribution.
33 33
34 34 New contributors are listed, without considering if the contribution contains
35 35 copyrightable work.
36 36
37 37 When the copyright might belong to a different legal entity than the
38 38 contributor, the legal entity is given credit too.
39 39 """
40 40
41 41 import os
42 42 import re
43 43 from collections import defaultdict
44 44
45 45 import contributor_data
46 46
47 47
48 48 def sortkey(x):
49 49 """Return key for sorting contributors "fairly":
50 50 * latest contribution
51 51 * first contribution
52 52 * number of contribution years
53 53 * name (with some unicode normalization)
54 54 The entries must be 2-tuples of a list of string years and the unicode name"""
55 55 return (x[0] and -int(x[0][-1]),
56 56 x[0] and int(x[0][0]),
57 57 -len(x[0]),
58 58 x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
59 59 )
60 60
61 61
62 62 def nice_years(l, dash='-', join=' '):
63 63 """Convert a list of years into brief range like '1900-1901, 1921'."""
64 64 if not l:
65 65 return ''
66 66 start = end = int(l[0])
67 67 ranges = []
68 68 for year in l[1:] + [0]:
69 69 year = int(year)
70 70 if year == end + 1:
71 71 end = year
72 72 continue
73 73 if start == end:
74 74 ranges.append('%s' % start)
75 75 else:
76 76 ranges.append('%s%s%s' % (start, dash, end))
77 77 start = end = year
78 78 assert start == 0 and end == 0, (start, end)
79 79 return join.join(ranges)
80 80
81 81
82 82 def insert_entries(
83 83 filename,
84 84 all_entries,
85 85 no_entries,
86 86 domain_extra,
87 87 split_re,
88 88 normalize_name,
89 89 format_f):
90 90 """Update file with contributor information.
91 91 all_entries: list of tuples with year and name
92 92 no_entries: set of names or name and year tuples to ignore
93 93 domain_extra: map domain name to extra credit name
94 94 split_re: regexp matching the part of file to rewrite
95 95 normalize_name: function to normalize names for grouping and display
96 96 format_f: function formatting year list and name to a string
97 97 """
98 98 name_years = defaultdict(set)
99 99
100 100 for year, name in all_entries:
101 101 if name in no_entries or (name, year) in no_entries:
102 102 continue
103 parts = name.split(' <', 1)
104 if len(parts) == 2:
105 name = parts[0] + ' <' + parts[1].lower()
103 106 domain = name.split('@', 1)[-1].rstrip('>')
104 107 if domain in domain_extra:
105 108 name_years[domain_extra[domain]].add(year)
106 109 name_years[normalize_name(name)].add(year)
107 110
108 111 l = [(list(sorted(year for year in years if year)), name)
109 112 for name, years in name_years.items()]
110 113 l.sort(key=sortkey)
111 114
112 115 with open(filename) as f:
113 116 pre, post = re.split(split_re, f.read())
114 117
115 118 with open(filename, 'w') as f:
116 119 f.write(pre +
117 120 ''.join(format_f(years, name) for years, name in l) +
118 121 post)
119 122
120 123
121 124 def main():
122 125 repo_entries = [
123 126 (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
124 127 for year, name in
125 128 (line.strip().split(' ', 1)
126 129 for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
127 130 ]
128 131
129 132 insert_entries(
130 133 filename='kallithea/templates/about.html',
131 134 all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
132 135 no_entries=contributor_data.no_about,
133 136 domain_extra=contributor_data.domain_extra,
134 137 split_re=r'(?: <li>Copyright &copy; [^\n]*</li>\n)*',
135 138 normalize_name=lambda name: name.split('<', 1)[0].strip(),
136 139 format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
137 140 )
138 141
139 142 insert_entries(
140 143 filename='CONTRIBUTORS',
141 144 all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
142 145 no_entries=contributor_data.total_ignore,
143 146 domain_extra=contributor_data.domain_extra,
144 147 split_re=r'(?: [^\n]*\n)*',
145 148 normalize_name=lambda name: name,
146 149 format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
147 150 )
148 151
149 152 insert_entries(
150 153 filename='kallithea/templates/base/base.html',
151 154 all_entries=repo_entries,
152 155 no_entries=contributor_data.total_ignore,
153 156 domain_extra={},
154 157 split_re=r'(?<=&copy;) .* (?=by various authors)',
155 158 normalize_name=lambda name: '',
156 159 format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
157 160 )
158 161
159 162 #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
160 163 insert_entries(
161 164 filename='docs/conf.py',
162 165 all_entries=repo_entries,
163 166 no_entries=contributor_data.total_ignore,
164 167 domain_extra={},
165 168 split_re=r"(?<=copyright = u').*(?= by various authors)",
166 169 normalize_name=lambda name: '',
167 170 format_f=lambda years, name: nice_years(years, '-', ', '),
168 171 )
169 172
170 173
171 174 if __name__ == '__main__':
172 175 main()
173 176
174 177
175 178 # To list new contributors since last tagging:
176 179 # { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u
General Comments 0
You need to be logged in to leave comments. Login now