##// END OF EJS Templates
scripts: fix crash from comparing integer with empty list...
scripts: fix crash from comparing integer with empty list Fixed by using year 0 as default for contributors without any years.

File last commit:

r8179:d6ccf6a9 default
r8179:d6ccf6a9 default
Show More
update-copyrights.py
182 lines | 6.4 KiB | text/x-python | PythonLexer
/ scripts / update-copyrights.py
Mads Kiilerich
py3: switch to use Python 3 interpreter, temporarily leaving many things very broken until they have been migrated/fixed in a reviewable way...
r8053 #!/usr/bin/env python3
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 # -*- coding: utf-8 -*-
"""
Kallithea script for maintaining contributor lists from version control
history.
This script and the data in it is a best effort attempt at reverse engineering
previous attributions and correlate that with version control history while
preserving all existing copyright statements and attribution. This script is
processing and summarizing information found elsewhere - it is not by itself
making any claims. Comments in the script are an attempt at reverse engineering
possible explanations - they are not showing any intent or confirming it is
correct.
Three files are generated / modified by this script:
kallithea/templates/about.html claims to show copyright holders, and the GPL
license requires such existing "legal notices" to be preserved. We also try to
keep it updated with copyright holders, but do not claim it is a correct list.
CONTRIBUTORS has the purpose of giving credit where credit is due and list all
the contributor names in the source.
kallithea/templates/base/base.html contains the copyright years in the page
footer.
Both make a best effort of listing all copyright holders, but revision control
history might be a better and more definitive source.
Contributors are sorted "fairly" by copyright year and amount of
contribution.
New contributors are listed, without considering if the contribution contains
copyrightable work.
When the copyright might belong to a different legal entity than the
contributor, the legal entity is given credit too.
"""
import os
import re
from collections import defaultdict
Mads Kiilerich
scripts: initial run of import cleanup using isort
r7718
Mads Kiilerich
scripts: use plain import of contributor_data...
r8177 import contributor_data
Mads Kiilerich
scripts: add update-copyrights.py...
r6681
def sortkey(x):
"""Return key for sorting contributors "fairly":
* latest contribution
* first contribution
* number of contribution years
* name (with some unicode normalization)
Mads Kiilerich
py3: some not-entirely-trivial removing of "unicode"
r8080 The entries must be 2-tuples of a list of string years and the name"""
Mads Kiilerich
scripts: fix crash from comparing integer with empty list...
r8179 years, name = x
if not years:
years = ['0']
return (-int(years[-1]), # primarily sort by latest contribution
int(years[0]), # then sort by first contribution
-len(years), # then sort by length of contribution (no gaps)
name.lower().replace('\xe9', 'e').replace('\u0142', 'l') # finally sort by name
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 )
def nice_years(l, dash='-', join=' '):
"""Convert a list of years into brief range like '1900-1901, 1921'."""
if not l:
return ''
start = end = int(l[0])
ranges = []
for year in l[1:] + [0]:
year = int(year)
if year == end + 1:
end = year
continue
if start == end:
ranges.append('%s' % start)
else:
ranges.append('%s%s%s' % (start, dash, end))
start = end = year
assert start == 0 and end == 0, (start, end)
return join.join(ranges)
def insert_entries(
filename,
all_entries,
no_entries,
domain_extra,
split_re,
normalize_name,
format_f):
"""Update file with contributor information.
all_entries: list of tuples with year and name
no_entries: set of names or name and year tuples to ignore
domain_extra: map domain name to extra credit name
split_re: regexp matching the part of file to rewrite
normalize_name: function to normalize names for grouping and display
format_f: function formatting year list and name to a string
"""
name_years = defaultdict(set)
for year, name in all_entries:
if name in no_entries or (name, year) in no_entries:
continue
Mads Kiilerich
scripts: lower case contributor emails to normalize them
r8015 parts = name.split(' <', 1)
if len(parts) == 2:
name = parts[0] + ' <' + parts[1].lower()
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 domain = name.split('@', 1)[-1].rstrip('>')
if domain in domain_extra:
name_years[domain_extra[domain]].add(year)
name_years[normalize_name(name)].add(year)
l = [(list(sorted(year for year in years if year)), name)
for name, years in name_years.items()]
l.sort(key=sortkey)
Lars Kruse
py3: replace "file" with "open"
r6785 with open(filename) as f:
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 pre, post = re.split(split_re, f.read())
Lars Kruse
py3: replace "file" with "open"
r6785 with open(filename, 'w') as f:
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 f.write(pre +
''.join(format_f(years, name) for years, name in l) +
post)
def main():
repo_entries = [
Thomas De Schampheleire
scripts: extract contributor data out of update-copyrights.py...
r7495 (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 for year, name in
(line.strip().split(' ', 1)
for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
]
insert_entries(
filename='kallithea/templates/about.html',
Mads Kiilerich
scripts: prepare for giving credit for contributions that have been integrated into other changesets
r7661 all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
Thomas De Schampheleire
scripts: extract contributor data out of update-copyrights.py...
r7495 no_entries=contributor_data.no_about,
domain_extra=contributor_data.domain_extra,
Mads Kiilerich
scripts: in regexps, use + when we actually want to make long matches instead of empty...
r8178 split_re=r'(?: <li>Copyright &copy; [^\n]+</li>\n)+',
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 normalize_name=lambda name: name.split('<', 1)[0].strip(),
format_f=lambda years, name: ' <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
)
insert_entries(
filename='CONTRIBUTORS',
Mads Kiilerich
scripts: prepare for giving credit for contributions that have been integrated into other changesets
r7661 all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
Thomas De Schampheleire
scripts: extract contributor data out of update-copyrights.py...
r7495 no_entries=contributor_data.total_ignore,
domain_extra=contributor_data.domain_extra,
Mads Kiilerich
scripts: in regexps, use + when we actually want to make long matches instead of empty...
r8178 split_re=r'(?: [^\n]+\n)+',
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 normalize_name=lambda name: name,
format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
)
insert_entries(
filename='kallithea/templates/base/base.html',
all_entries=repo_entries,
Thomas De Schampheleire
scripts: extract contributor data out of update-copyrights.py...
r7495 no_entries=contributor_data.total_ignore,
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 domain_extra={},
Mads Kiilerich
scripts: in regexps, use + when we actually want to make long matches instead of empty...
r8178 split_re=r'(?<=&copy;) .+ (?=by various authors)',
Mads Kiilerich
scripts: add update-copyrights.py...
r6681 normalize_name=lambda name: '',
format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
)
Mads Kiilerich
docs: maintain copyright year from update-copyrights.py
r7662 #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
insert_entries(
filename='docs/conf.py',
all_entries=repo_entries,
no_entries=contributor_data.total_ignore,
domain_extra={},
Mads Kiilerich
scripts: in regexps, use + when we actually want to make long matches instead of empty...
r8178 split_re=r"(?<=copyright = ').+(?= by various authors)",
Mads Kiilerich
docs: maintain copyright year from update-copyrights.py
r7662 normalize_name=lambda name: '',
Mads Kiilerich
flake8: fix E203 whitespace before ','
r7724 format_f=lambda years, name: nice_years(years, '-', ', '),
Mads Kiilerich
docs: maintain copyright year from update-copyrights.py
r7662 )
Mads Kiilerich
scripts: add update-copyrights.py...
r6681
if __name__ == '__main__':
main()
# To list new contributors since last tagging:
# { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u