upstream/kallithea Commit - r8179:d6ccf6a9

1

#!/usr/bin/env python3

1

#!/usr/bin/env python3

2

# -*- coding: utf-8 -*-

2

# -*- coding: utf-8 -*-

3

4

"""

4

"""

5

Kallithea script for maintaining contributor lists from version control

5

Kallithea script for maintaining contributor lists from version control

6

history.

6

history.

7

8

This script and the data in it is a best effort attempt at reverse engineering

8

This script and the data in it is a best effort attempt at reverse engineering

9

previous attributions and correlate that with version control history while

9

previous attributions and correlate that with version control history while

10

preserving all existing copyright statements and attribution. This script is

10

preserving all existing copyright statements and attribution. This script is

11

processing and summarizing information found elsewhere - it is not by itself

11

processing and summarizing information found elsewhere - it is not by itself

12

making any claims. Comments in the script are an attempt at reverse engineering

12

making any claims. Comments in the script are an attempt at reverse engineering

13

possible explanations - they are not showing any intent or confirming it is

13

possible explanations - they are not showing any intent or confirming it is

14

correct.

14

correct.

15

16

Three files are generated / modified by this script:

16

Three files are generated / modified by this script:

17

18

kallithea/templates/about.html claims to show copyright holders, and the GPL

18

kallithea/templates/about.html claims to show copyright holders, and the GPL

19

license requires such existing "legal notices" to be preserved. We also try to

19

license requires such existing "legal notices" to be preserved. We also try to

20

keep it updated with copyright holders, but do not claim it is a correct list.

20

keep it updated with copyright holders, but do not claim it is a correct list.

21

22

CONTRIBUTORS has the purpose of giving credit where credit is due and list all

22

CONTRIBUTORS has the purpose of giving credit where credit is due and list all

23

the contributor names in the source.

23

the contributor names in the source.

24

25

kallithea/templates/base/base.html contains the copyright years in the page

25

kallithea/templates/base/base.html contains the copyright years in the page

26

footer.

26

footer.

27

28

Both make a best effort of listing all copyright holders, but revision control

28

Both make a best effort of listing all copyright holders, but revision control

29

history might be a better and more definitive source.

29

history might be a better and more definitive source.

30

31

Contributors are sorted "fairly" by copyright year and amount of

31

Contributors are sorted "fairly" by copyright year and amount of

32

contribution.

32

contribution.

33

34

New contributors are listed, without considering if the contribution contains

34

New contributors are listed, without considering if the contribution contains

35

copyrightable work.

35

copyrightable work.

36

37

When the copyright might belong to a different legal entity than the

37

When the copyright might belong to a different legal entity than the

38

contributor, the legal entity is given credit too.

38

contributor, the legal entity is given credit too.

39

"""

39

"""

40

41

import os

41

import os

42

import re

42

import re

43

from collections import defaultdict

43

from collections import defaultdict

44

45

import contributor_data

45

import contributor_data

46

47

48

def sortkey(x):

48

def sortkey(x):

49

"""Return key for sorting contributors "fairly":

49

"""Return key for sorting contributors "fairly":

50

* latest contribution

50

* latest contribution

51

* first contribution

51

* first contribution

52

* number of contribution years

52

* number of contribution years

53

* name (with some unicode normalization)

53

* name (with some unicode normalization)

54

The entries must be 2-tuples of a list of string years and the name"""

54

The entries must be 2-tuples of a list of string years and the name"""

55

return (x[0] and -int(x[0][-1]),

55

years, name = x

56

x[0] and int(x[0][0]),

56

if not years:

57

-len(x[0]),

57

years = ['0']

58

x[1].decode('utf-8').lower().replace('\xe9', 'e').replace('\u0142', 'l')

58

return (-int(years[-1]), # primarily sort by latest contribution

59

int(years[0]), # then sort by first contribution

60

-len(years), # then sort by length of contribution (no gaps)

61

name.lower().replace('\xe9', 'e').replace('\u0142', 'l') # finally sort by name

59

)

62

)

60

63

61

64

62

def nice_years(l, dash='-', join=' '):

65

def nice_years(l, dash='-', join=' '):

63

"""Convert a list of years into brief range like '1900-1901, 1921'."""

66

"""Convert a list of years into brief range like '1900-1901, 1921'."""

64

if not l:

67

if not l:

65

return ''

68

return ''

66

start = end = int(l[0])

69

start = end = int(l[0])

67

ranges = []

70

ranges = []

68

for year in l[1:] + [0]:

71

for year in l[1:] + [0]:

69

year = int(year)

72

year = int(year)

70

if year == end + 1:

73

if year == end + 1:

71

end = year

74

end = year

72

continue

75

continue

73

if start == end:

76

if start == end:

74

ranges.append('%s' % start)

77

ranges.append('%s' % start)

75

else:

78

else:

76

ranges.append('%s%s%s' % (start, dash, end))

79

ranges.append('%s%s%s' % (start, dash, end))

77

start = end = year

80

start = end = year

78

assert start == 0 and end == 0, (start, end)

81

assert start == 0 and end == 0, (start, end)

79

return join.join(ranges)

82

return join.join(ranges)

80

83

81

84

82

def insert_entries(

85

def insert_entries(

83

filename,

86

filename,

84

all_entries,

87

all_entries,

85

no_entries,

88

no_entries,

86

domain_extra,

89

domain_extra,

87

split_re,

90

split_re,

88

normalize_name,

91

normalize_name,

89

format_f):

92

format_f):

90

"""Update file with contributor information.

93

"""Update file with contributor information.

91

all_entries: list of tuples with year and name

94

all_entries: list of tuples with year and name

92

no_entries: set of names or name and year tuples to ignore

95

no_entries: set of names or name and year tuples to ignore

93

domain_extra: map domain name to extra credit name

96

domain_extra: map domain name to extra credit name

94

split_re: regexp matching the part of file to rewrite

97

split_re: regexp matching the part of file to rewrite

95

normalize_name: function to normalize names for grouping and display

98

normalize_name: function to normalize names for grouping and display

96

format_f: function formatting year list and name to a string

99

format_f: function formatting year list and name to a string

97

"""

100

"""

98

name_years = defaultdict(set)

101

name_years = defaultdict(set)

99

102

100

for year, name in all_entries:

103

for year, name in all_entries:

101

if name in no_entries or (name, year) in no_entries:

104

if name in no_entries or (name, year) in no_entries:

102

continue

105

continue

103

parts = name.split(' <', 1)

106

parts = name.split(' <', 1)

104

if len(parts) == 2:

107

if len(parts) == 2:

105

name = parts[0] + ' <' + parts[1].lower()

108

name = parts[0] + ' <' + parts[1].lower()

106

domain = name.split('@', 1)[-1].rstrip('>')

109

domain = name.split('@', 1)[-1].rstrip('>')

107

if domain in domain_extra:

110

if domain in domain_extra:

108

name_years[domain_extra[domain]].add(year)

111

name_years[domain_extra[domain]].add(year)

109

name_years[normalize_name(name)].add(year)

112

name_years[normalize_name(name)].add(year)

110

113

111

l = [(list(sorted(year for year in years if year)), name)

114

l = [(list(sorted(year for year in years if year)), name)

112

for name, years in name_years.items()]

115

for name, years in name_years.items()]

113

l.sort(key=sortkey)

116

l.sort(key=sortkey)

114

117

115

with open(filename) as f:

118

with open(filename) as f:

116

pre, post = re.split(split_re, f.read())

119

pre, post = re.split(split_re, f.read())

117

120

118

with open(filename, 'w') as f:

121

with open(filename, 'w') as f:

119

f.write(pre +

122

f.write(pre +

120

''.join(format_f(years, name) for years, name in l) +

123

''.join(format_f(years, name) for years, name in l) +

121

post)

124

post)

122

125

123

126

124

def main():

127

def main():

125

repo_entries = [

128

repo_entries = [

126

(year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)

129

(year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)

127

for year, name in

130

for year, name in

128

(line.strip().split(' ', 1)

131

(line.strip().split(' ', 1)

129

for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())

132

for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())

130

]

133

]

131

134

132

insert_entries(

135

insert_entries(

133

filename='kallithea/templates/about.html',

136

filename='kallithea/templates/about.html',

134

all_entries=repo_entries + contributor_data.other_about + contributor_data.other,

137

all_entries=repo_entries + contributor_data.other_about + contributor_data.other,

135

no_entries=contributor_data.no_about,

138

no_entries=contributor_data.no_about,

136

domain_extra=contributor_data.domain_extra,

139

domain_extra=contributor_data.domain_extra,

137

140

138

normalize_name=lambda name: name.split('<', 1)[0].strip(),

141

normalize_name=lambda name: name.split('<', 1)[0].strip(),

139

142

140

)

143

)

141

144

142

insert_entries(

145

insert_entries(

143

filename='CONTRIBUTORS',

146

filename='CONTRIBUTORS',

144

all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,

147

all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,

145

no_entries=contributor_data.total_ignore,

148

no_entries=contributor_data.total_ignore,

146

domain_extra=contributor_data.domain_extra,

149

domain_extra=contributor_data.domain_extra,

147

split_re=r'(?: [^\n]+\n)+',

150

split_re=r'(?: [^\n]+\n)+',

148

normalize_name=lambda name: name,

151

normalize_name=lambda name: name,

149

format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),

152

format_f=lambda years, name: (' %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),

150

)

153

)

151

154

152

insert_entries(

155

insert_entries(

153

filename='kallithea/templates/base/base.html',

156

filename='kallithea/templates/base/base.html',

154

all_entries=repo_entries,

157

all_entries=repo_entries,

155

no_entries=contributor_data.total_ignore,

158

no_entries=contributor_data.total_ignore,

156

domain_extra={},

159

domain_extra={},

157

160

158

normalize_name=lambda name: '',

161

normalize_name=lambda name: '',

159

format_f=lambda years, name: ' ' + nice_years(years, '–', ', ') + ' ',

162

format_f=lambda years, name: ' ' + nice_years(years, '–', ', ') + ' ',

160

)

163

)

161

164

162

165

163

insert_entries(

166

insert_entries(

164

filename='docs/conf.py',

167

filename='docs/conf.py',

165

all_entries=repo_entries,

168

all_entries=repo_entries,

166

no_entries=contributor_data.total_ignore,

169

no_entries=contributor_data.total_ignore,

167

domain_extra={},

170

domain_extra={},

168

split_re=r"(?<=copyright = ').+(?= by various authors)",

171

split_re=r"(?<=copyright = ').+(?= by various authors)",

169

normalize_name=lambda name: '',

172

normalize_name=lambda name: '',

170

format_f=lambda years, name: nice_years(years, '-', ', '),

173

format_f=lambda years, name: nice_years(years, '-', ', '),

171

)

174

)

172

175

173

176

174

if __name__ == '__main__':

177

if __name__ == '__main__':

175

main()

178

main()

176

179

177

180

178

# To list new contributors since last tagging:

181

# To list new contributors since last tagging:

179

# { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u

182

# { hg log -r '::tagged()' -T ' {author}\n {author}\n'; hg log -r '::.' -T ' {author}\n' | sort | uniq; } | sort | uniq -u

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python3
             # -*- coding: utf-8 -*-
             """
             Kallithea script for maintaining contributor lists from version control
             history.
             This script and the data in it is a best effort attempt at reverse engineering
             previous attributions and correlate that with version control history while
             preserving all existing copyright statements and attribution. This script is
             processing and summarizing information found elsewhere - it is not by itself
             making any claims. Comments in the script are an attempt at reverse engineering
             possible explanations - they are not showing any intent or confirming it is
             correct.
             Three files are generated / modified by this script:
             kallithea/templates/about.html claims to show copyright holders, and the GPL
             license requires such existing "legal notices" to be preserved. We also try to
             keep it updated with copyright holders, but do not claim it is a correct list.
             CONTRIBUTORS has the purpose of giving credit where credit is due and list all
             the contributor names in the source.
             kallithea/templates/base/base.html contains the copyright years in the page
             footer.
             Both make a best effort of listing all copyright holders, but revision control
             history might be a better and more definitive source.
             Contributors are sorted "fairly" by copyright year and amount of
             contribution.
             New contributors are listed, without considering if the contribution contains
             copyrightable work.
             When the copyright might belong to a different legal entity than the
             contributor, the legal entity is given credit too.
             """
             import os
             import re
             from collections import defaultdict
             import contributor_data
             def sortkey(x):
                 """Return key for sorting contributors "fairly":
                 * latest contribution
                 * first contribution
                 * number of contribution years
                 * name (with some unicode normalization)
                 The entries must be 2-tuples of a list of string years and the name"""
-                return (x[0] and -int(x[0][-1]),
+                years, name = x
-                        x[0] and int(x[0][0]),
+                if not years:
-                        -len(x[0]),
+                    years = ['0']
-                        x[1].decode('utf-8').lower().replace('\xe9', 'e').replace('\u0142', 'l')
+                return (-int(years[-1]),  # primarily sort by latest contribution
+                        int(years[0]),  # then sort by first contribution
+                        -len(years),  # then sort by length of contribution (no gaps)
+                        name.lower().replace('\xe9', 'e').replace('\u0142', 'l')  # finally sort by name
                     )
             def nice_years(l, dash='-', join=' '):
                 """Convert a list of years into brief range like '1900-1901, 1921'."""
                 if not l:
                     return ''
                 start = end = int(l[0])
                 ranges = []
                 for year in l[1:] + [0]:
                     year = int(year)
                     if year == end + 1:
                         end = year
                         continue
                     if start == end:
                         ranges.append('%s' % start)
                     else:
                         ranges.append('%s%s%s' % (start, dash, end))
                     start = end = year
                 assert start == 0 and end == 0, (start, end)
                 return join.join(ranges)
             def insert_entries(
                     filename,
                     all_entries,
                     no_entries,
                     domain_extra,
                     split_re,
                     normalize_name,
                     format_f):
                 """Update file with contributor information.
                 all_entries: list of tuples with year and name
                 no_entries: set of names or name and year tuples to ignore
                 domain_extra: map domain name to extra credit name
                 split_re: regexp matching the part of file to rewrite
                 normalize_name: function to normalize names for grouping and display
                 format_f: function formatting year list and name to a string
                 """
                 name_years = defaultdict(set)
                 for year, name in all_entries:
                     if name in no_entries or (name, year) in no_entries:
                         continue
                     parts = name.split(' <', 1)
                     if len(parts) == 2:
                         name = parts[0] + ' <' + parts[1].lower()
                     domain = name.split('@', 1)[-1].rstrip('>')
                     if domain in domain_extra:
                         name_years[domain_extra[domain]].add(year)
                     name_years[normalize_name(name)].add(year)
                 l = [(list(sorted(year for year in years if year)), name)
                      for name, years in name_years.items()]
                 l.sort(key=sortkey)
                 with open(filename) as f:
                     pre, post = re.split(split_re, f.read())
                 with open(filename, 'w') as f:
                     f.write(pre +
                             ''.join(format_f(years, name) for years, name in l) +
                             post)
             def main():
                 repo_entries = [
                     (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
                     for year, name in
                     (line.strip().split(' ', 1)
                      for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
                     ]
                 insert_entries(
                     filename='kallithea/templates/about.html',
                     all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
                     no_entries=contributor_data.no_about,
                     domain_extra=contributor_data.domain_extra,
                     split_re=r'(?:  <li>Copyright &copy; [^\n]+</li>\n)+',
                     normalize_name=lambda name: name.split('<', 1)[0].strip(),
                     format_f=lambda years, name: '  <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
                     )
                 insert_entries(
                     filename='CONTRIBUTORS',
                     all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
                     no_entries=contributor_data.total_ignore,
                     domain_extra=contributor_data.domain_extra,
                     split_re=r'(?:    [^\n]+\n)+',
                     normalize_name=lambda name: name,
                     format_f=lambda years, name: ('    %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
                     )
                 insert_entries(
                     filename='kallithea/templates/base/base.html',
                     all_entries=repo_entries,
                     no_entries=contributor_data.total_ignore,
                     domain_extra={},
                     split_re=r'(?<=&copy;) .+ (?=by various authors)',
                     normalize_name=lambda name: '',
                     format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
                     )
                 #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
                 insert_entries(
                     filename='docs/conf.py',
                     all_entries=repo_entries,
                     no_entries=contributor_data.total_ignore,
                     domain_extra={},
                     split_re=r"(?<=copyright = ').+(?= by various authors)",
                     normalize_name=lambda name: '',
                     format_f=lambda years, name: nice_years(years, '-', ', '),
                     )
             if __name__ == '__main__':
                 main()
             # To list new contributors since last tagging:
             # { hg log -r '::tagged()' -T '    {author}\n    {author}\n'; hg log -r '::.' -T '    {author}\n' | sort | uniq; } | sort | uniq -u