upstream/kallithea Commit - r8015:27d6f56a

1

#!/usr/bin/env python2

1

#!/usr/bin/env python2

2

# -*- coding: utf-8 -*-

2

# -*- coding: utf-8 -*-

3

4

"""

4

"""

5

Kallithea script for maintaining contributor lists from version control

5

Kallithea script for maintaining contributor lists from version control

6

history.

6

history.

7

8

This script and the data in it is a best effort attempt at reverse engineering

8

This script and the data in it is a best effort attempt at reverse engineering

9

previous attributions and correlate that with version control history while

9

previous attributions and correlate that with version control history while

10

preserving all existing copyright statements and attribution. This script is

10

preserving all existing copyright statements and attribution. This script is

11

processing and summarizing information found elsewhere - it is not by itself

11

processing and summarizing information found elsewhere - it is not by itself

12

making any claims. Comments in the script are an attempt at reverse engineering

12

making any claims. Comments in the script are an attempt at reverse engineering

13

possible explanations - they are not showing any intent or confirming it is

13

possible explanations - they are not showing any intent or confirming it is

14

correct.

14

correct.

15

16

Three files are generated / modified by this script:

16

Three files are generated / modified by this script:

17

18

kallithea/templates/about.html claims to show copyright holders, and the GPL

18

kallithea/templates/about.html claims to show copyright holders, and the GPL

19

license requires such existing "legal notices" to be preserved. We also try to

19

license requires such existing "legal notices" to be preserved. We also try to

20

keep it updated with copyright holders, but do not claim it is a correct list.

20

keep it updated with copyright holders, but do not claim it is a correct list.

21

22

CONTRIBUTORS has the purpose of giving credit where credit is due and list all

22

CONTRIBUTORS has the purpose of giving credit where credit is due and list all

23

the contributor names in the source.

23

the contributor names in the source.

24

25

kallithea/templates/base/base.html contains the copyright years in the page

25

kallithea/templates/base/base.html contains the copyright years in the page

26

footer.

26

footer.

27

28

Both make a best effort of listing all copyright holders, but revision control

28

Both make a best effort of listing all copyright holders, but revision control

29

history might be a better and more definitive source.

29

history might be a better and more definitive source.

30

31

Contributors are sorted "fairly" by copyright year and amount of

31

Contributors are sorted "fairly" by copyright year and amount of

32

contribution.

32

contribution.

33

34

New contributors are listed, without considering if the contribution contains

34

New contributors are listed, without considering if the contribution contains

35

copyrightable work.

35

copyrightable work.

36

37

When the copyright might belong to a different legal entity than the

37

When the copyright might belong to a different legal entity than the

38

contributor, the legal entity is given credit too.

38

contributor, the legal entity is given credit too.

39

"""

39

"""

40

41

import os

41

import os

42

import re

42

import re

43

from collections import defaultdict

43

from collections import defaultdict

44

45

import contributor_data

45

import contributor_data

46

47

48

def sortkey(x):

48

def sortkey(x):

49

"""Return key for sorting contributors "fairly":

49

"""Return key for sorting contributors "fairly":

50

* latest contribution

50

* latest contribution

51

* first contribution

51

* first contribution

52

* number of contribution years

52

* number of contribution years

53

* name (with some unicode normalization)

53

* name (with some unicode normalization)

54

The entries must be 2-tuples of a list of string years and the unicode name"""

54

The entries must be 2-tuples of a list of string years and the unicode name"""

55

return (x[0] and -int(x[0][-1]),

55

return (x[0] and -int(x[0][-1]),

56

x[0] and int(x[0][0]),

56

x[0] and int(x[0][0]),

57

-len(x[0]),

57

-len(x[0]),

58

x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')

58

x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')

59

)

59

)

60

61

62

def nice_years(l, dash='-', join=' '):

62

def nice_years(l, dash='-', join=' '):

63

"""Convert a list of years into brief range like '1900-1901, 1921'."""

63

"""Convert a list of years into brief range like '1900-1901, 1921'."""

64

if not l:

64

if not l:

65

return ''

65

return ''

66

start = end = int(l[0])

66

start = end = int(l[0])

67

ranges = []

67

ranges = []

68

for year in l[1:] + [0]:

68

for year in l[1:] + [0]:

69

year = int(year)

69

year = int(year)

70

if year == end + 1:

70

if year == end + 1:

71

end = year

71

end = year

72

continue

72

continue

73

if start == end:

73

if start == end:

74

ranges.append('%s' % start)

74

ranges.append('%s' % start)

75

else:

75

else:

76

ranges.append('%s%s%s' % (start, dash, end))

76

ranges.append('%s%s%s' % (start, dash, end))

77

start = end = year

77

start = end = year

78

assert start == 0 and end == 0, (start, end)

78

assert start == 0 and end == 0, (start, end)

79

return join.join(ranges)

79

return join.join(ranges)

80

81

82

def insert_entries(

82

def insert_entries(

83

filename,

83

filename,

84

all_entries,

84

all_entries,

85

no_entries,

85

no_entries,

86

domain_extra,

86

domain_extra,

87

split_re,

87

split_re,

88

normalize_name,

88

normalize_name,

89

format_f):

89

format_f):

90

"""Update file with contributor information.

90

"""Update file with contributor information.

91

all_entries: list of tuples with year and name

91

all_entries: list of tuples with year and name

92

no_entries: set of names or name and year tuples to ignore

92

no_entries: set of names or name and year tuples to ignore

93

domain_extra: map domain name to extra credit name

93

domain_extra: map domain name to extra credit name

94

split_re: regexp matching the part of file to rewrite

94

split_re: regexp matching the part of file to rewrite

95

normalize_name: function to normalize names for grouping and display

95

normalize_name: function to normalize names for grouping and display

96

format_f: function formatting year list and name to a string

96

format_f: function formatting year list and name to a string

97

"""

97

"""

98

name_years = defaultdict(set)

98

name_years = defaultdict(set)

99

100

for year, name in all_entries:

100

for year, name in all_entries:

101

if name in no_entries or (name, year) in no_entries:

101

if name in no_entries or (name, year) in no_entries:

102

continue

102

continue

103

parts = name.split(' <', 1)

104

if len(parts) == 2:

105

name = parts[0] + ' <' + parts[1].lower()

103

domain = name.split('@', 1)[-1].rstrip('>')

106

domain = name.split('@', 1)[-1].rstrip('>')

104

if domain in domain_extra:

107

if domain in domain_extra:

105

name_years[domain_extra[domain]].add(year)

108

name_years[domain_extra[domain]].add(year)

106

name_years[normalize_name(name)].add(year)

109

name_years[normalize_name(name)].add(year)

107

110

108

l = [(list(sorted(year for year in years if year)), name)

111

l = [(list(sorted(year for year in years if year)), name)

109

for name, years in name_years.items()]

112

for name, years in name_years.items()]

110

l.sort(key=sortkey)

113

l.sort(key=sortkey)

111

114

112

with open(filename) as f:

115

with open(filename) as f:

113

pre, post = re.split(split_re, f.read())

116

pre, post = re.split(split_re, f.read())

114

117

115

with open(filename, 'w') as f:

118

with open(filename, 'w') as f:

116

f.write(pre +

119

f.write(pre +

117

''.join(format_f(years, name) for years, name in l) +

120

''.join(format_f(years, name) for years, name in l) +

118

post)

121

post)

119

122

120

123

121

def main():

124

def main():

122

repo_entries = [

125

repo_entries = [

123

(year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)

126

(year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)

124

for year, name in

127

for year, name in

125

(line.strip().split(' ', 1)

128

(line.strip().split(' ', 1)

126

for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())

129

for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())

127

]

130

]

128

131

129

insert_entries(

132

insert_entries(

130

filename='kallithea/templates/about.html',

133

filename='kallithea/templates/about.html',

131

all_entries=repo_entries + contributor_data.other_about + contributor_data.other,

134

all_entries=repo_entries + contributor_data.other_about + contributor_data.other,

132

no_entries=contributor_data.no_about,

135

no_entries=contributor_data.no_about,

133

domain_extra=contributor_data.domain_extra,

136

domain_extra=contributor_data.domain_extra,

134

137

135

normalize_name=lambda name: name.split('<', 1)[0].strip(),

138

normalize_name=lambda name: name.split('<', 1)[0].strip(),

136

139

137

)

140

)

138

141

139

insert_entries(

142

insert_entries(

140

filename='CONTRIBUTORS',

143

filename='CONTRIBUTORS',

141

all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,

144

all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,

142

no_entries=contributor_data.total_ignore,

145

no_entries=contributor_data.total_ignore,

143

domain_extra=contributor_data.domain_extra,

146

domain_extra=contributor_data.domain_extra,

144

split_re=r'(?: [^\n]*\n)*',

147

split_re=r'(?: [^\n]*\n)*',

145

normalize_name=lambda name: name,

148

normalize_name=lambda name: name,

146

format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),

149

format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),

147

)

150

)

148

151

149

insert_entries(

152

insert_entries(

150

filename='kallithea/templates/base/base.html',

153

filename='kallithea/templates/base/base.html',

151

all_entries=repo_entries,

154

all_entries=repo_entries,

152

no_entries=contributor_data.total_ignore,

155

no_entries=contributor_data.total_ignore,

153

domain_extra={},

156

domain_extra={},

154

157

155

normalize_name=lambda name: '',

158

normalize_name=lambda name: '',

156

format_f=lambda years, name: ' ' + nice_years(years, '–', ', ') + ' ',

159

format_f=lambda years, name: ' ' + nice_years(years, '–', ', ') + ' ',

157

)

160

)

158

161

159

162

160

insert_entries(

163

insert_entries(

161

filename='docs/conf.py',

164

filename='docs/conf.py',

162

all_entries=repo_entries,

165

all_entries=repo_entries,

163

no_entries=contributor_data.total_ignore,

166

no_entries=contributor_data.total_ignore,

164

domain_extra={},

167

domain_extra={},

165

split_re=r"(?<=copyright = u').*(?= by various authors)",

168

split_re=r"(?<=copyright = u').*(?= by various authors)",

166

normalize_name=lambda name: '',

169

normalize_name=lambda name: '',

167

format_f=lambda years, name: nice_years(years, '-', ', '),

170

format_f=lambda years, name: nice_years(years, '-', ', '),

168

)

171

)

169

172

170

173

171

if __name__ == '__main__':

174

if __name__ == '__main__':

172

main()

175

main()

173

176

174

177

175

# To list new contributors since last tagging:

178

# To list new contributors since last tagging:

176

# { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u

179

# { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python2
             # -*- coding: utf-8 -*-
             """
             Kallithea script for maintaining contributor lists from version control
             history.
             This script and the data in it is a best effort attempt at reverse engineering
             previous attributions and correlate that with version control history while
             preserving all existing copyright statements and attribution. This script is
             processing and summarizing information found elsewhere - it is not by itself
             making any claims. Comments in the script are an attempt at reverse engineering
             possible explanations - they are not showing any intent or confirming it is
             correct.
             Three files are generated / modified by this script:
             kallithea/templates/about.html claims to show copyright holders, and the GPL
             license requires such existing "legal notices" to be preserved. We also try to
             keep it updated with copyright holders, but do not claim it is a correct list.
             CONTRIBUTORS has the purpose of giving credit where credit is due and list all
             the contributor names in the source.
             kallithea/templates/base/base.html contains the copyright years in the page
             footer.
             Both make a best effort of listing all copyright holders, but revision control
             history might be a better and more definitive source.
             Contributors are sorted "fairly" by copyright year and amount of
             contribution.
             New contributors are listed, without considering if the contribution contains
             copyrightable work.
             When the copyright might belong to a different legal entity than the
             contributor, the legal entity is given credit too.
             """
             import os
             import re
             from collections import defaultdict
             import contributor_data
             def sortkey(x):
                 """Return key for sorting contributors "fairly":
                 * latest contribution
                 * first contribution
                 * number of contribution years
                 * name (with some unicode normalization)
                 The entries must be 2-tuples of a list of string years and the unicode name"""
                 return (x[0] and -int(x[0][-1]),
                         x[0] and int(x[0][0]),
                         -len(x[0]),
                         x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
                     )
             def nice_years(l, dash='-', join=' '):
                 """Convert a list of years into brief range like '1900-1901, 1921'."""
                 if not l:
                     return ''
                 start = end = int(l[0])
                 ranges = []
                 for year in l[1:] + [0]:
                     year = int(year)
                     if year == end + 1:
                         end = year
                         continue
                     if start == end:
                         ranges.append('%s' % start)
                     else:
                         ranges.append('%s%s%s' % (start, dash, end))
                     start = end = year
                 assert start == 0 and end == 0, (start, end)
                 return join.join(ranges)
             def insert_entries(
                     filename,
                     all_entries,
                     no_entries,
                     domain_extra,
                     split_re,
                     normalize_name,
                     format_f):
                 """Update file with contributor information.
                 all_entries: list of tuples with year and name
                 no_entries: set of names or name and year tuples to ignore
                 domain_extra: map domain name to extra credit name
                 split_re: regexp matching the part of file to rewrite
                 normalize_name: function to normalize names for grouping and display
                 format_f: function formatting year list and name to a string
                 """
                 name_years = defaultdict(set)
                 for year, name in all_entries:
                     if name in no_entries or (name, year) in no_entries:
                         continue
+                    parts = name.split(' <', 1)
+                    if len(parts) == 2:
+                        name = parts[0] + ' <' + parts[1].lower()
                     domain = name.split('@', 1)[-1].rstrip('>')
                     if domain in domain_extra:
                         name_years[domain_extra[domain]].add(year)
                     name_years[normalize_name(name)].add(year)
                 l = [(list(sorted(year for year in years if year)), name)
                      for name, years in name_years.items()]
                 l.sort(key=sortkey)
                 with open(filename) as f:
                     pre, post = re.split(split_re, f.read())
                 with open(filename, 'w') as f:
                     f.write(pre +
                             ''.join(format_f(years, name) for years, name in l) +
                             post)
             def main():
                 repo_entries = [
                     (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
                     for year, name in
                     (line.strip().split(' ', 1)
                      for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
                     ]
                 insert_entries(
                     filename='kallithea/templates/about.html',
                     all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
                     no_entries=contributor_data.no_about,
                     domain_extra=contributor_data.domain_extra,
                     split_re=r'(?:  <li>Copyright &copy; [^\n]*</li>\n)*',
                     normalize_name=lambda name: name.split('<', 1)[0].strip(),
                     format_f=lambda years, name: '  <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
                     )
                 insert_entries(
                     filename='CONTRIBUTORS',
                     all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
                     no_entries=contributor_data.total_ignore,
                     domain_extra=contributor_data.domain_extra,
                     split_re=r'(?:    [^\n]*\n)*',
                     normalize_name=lambda name: name,
                     format_f=lambda years, name: ('    %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
                     )
                 insert_entries(
                     filename='kallithea/templates/base/base.html',
                     all_entries=repo_entries,
                     no_entries=contributor_data.total_ignore,
                     domain_extra={},
                     split_re=r'(?<=&copy;) .* (?=by various authors)',
                     normalize_name=lambda name: '',
                     format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
                     )
                 #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
                 insert_entries(
                     filename='docs/conf.py',
                     all_entries=repo_entries,
                     no_entries=contributor_data.total_ignore,
                     domain_extra={},
                     split_re=r"(?<=copyright = u').*(?= by various authors)",
                     normalize_name=lambda name: '',
                     format_f=lambda years, name: nice_years(years, '-', ', '),
                     )
             if __name__ == '__main__':
                 main()
             # To list new contributors since last tagging:
             # { hg log -r '::tagged()' -T '    {author}\n    {author}\n'; hg log -r '::.' -T '    {author}\n' | sort | uniq; } | sort | uniq -u