##// END OF EJS Templates
stringutil: update list of re-special characters to include &~...
Augie Fackler -
r38496:de275ab3 default
parent child Browse files
Show More
@@ -1,560 +1,560
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import ast
12 import ast
13 import codecs
13 import codecs
14 import re as remod
14 import re as remod
15 import textwrap
15 import textwrap
16
16
17 from ..i18n import _
17 from ..i18n import _
18 from ..thirdparty import attr
18 from ..thirdparty import attr
19
19
20 from .. import (
20 from .. import (
21 encoding,
21 encoding,
22 error,
22 error,
23 pycompat,
23 pycompat,
24 )
24 )
25
25
26 # regex special chars pulled from https://bugs.python.org/issue29995
26 # regex special chars pulled from https://bugs.python.org/issue29995
27 # which was part of Python 3.7.
27 # which was part of Python 3.7.
28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.# \t\n\r\v\f')
28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30
30
31 def reescape(pat):
31 def reescape(pat):
32 """Drop-in replacement for re.escape."""
32 """Drop-in replacement for re.escape."""
33 # NOTE: it is intentional that this works on unicodes and not
33 # NOTE: it is intentional that this works on unicodes and not
34 # bytes, as it's only possible to do the escaping with
34 # bytes, as it's only possible to do the escaping with
35 # unicode.translate, not bytes.translate. Sigh.
35 # unicode.translate, not bytes.translate. Sigh.
36 wantuni = True
36 wantuni = True
37 if isinstance(pat, bytes):
37 if isinstance(pat, bytes):
38 wantuni = False
38 wantuni = False
39 pat = pat.decode('latin1')
39 pat = pat.decode('latin1')
40 pat = pat.translate(_regexescapemap)
40 pat = pat.translate(_regexescapemap)
41 if wantuni:
41 if wantuni:
42 return pat
42 return pat
43 return pat.encode('latin1')
43 return pat.encode('latin1')
44
44
45 def pprint(o, bprefix=False):
45 def pprint(o, bprefix=False):
46 """Pretty print an object."""
46 """Pretty print an object."""
47 if isinstance(o, bytes):
47 if isinstance(o, bytes):
48 if bprefix:
48 if bprefix:
49 return "b'%s'" % escapestr(o)
49 return "b'%s'" % escapestr(o)
50 return "'%s'" % escapestr(o)
50 return "'%s'" % escapestr(o)
51 elif isinstance(o, bytearray):
51 elif isinstance(o, bytearray):
52 # codecs.escape_encode() can't handle bytearray, so escapestr fails
52 # codecs.escape_encode() can't handle bytearray, so escapestr fails
53 # without coercion.
53 # without coercion.
54 return "bytearray['%s']" % escapestr(bytes(o))
54 return "bytearray['%s']" % escapestr(bytes(o))
55 elif isinstance(o, list):
55 elif isinstance(o, list):
56 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
56 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
57 elif isinstance(o, dict):
57 elif isinstance(o, dict):
58 return '{%s}' % (b', '.join(
58 return '{%s}' % (b', '.join(
59 '%s: %s' % (pprint(k, bprefix=bprefix),
59 '%s: %s' % (pprint(k, bprefix=bprefix),
60 pprint(v, bprefix=bprefix))
60 pprint(v, bprefix=bprefix))
61 for k, v in sorted(o.items())))
61 for k, v in sorted(o.items())))
62 elif isinstance(o, tuple):
62 elif isinstance(o, tuple):
63 return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
63 return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
64 else:
64 else:
65 return pycompat.byterepr(o)
65 return pycompat.byterepr(o)
66
66
67 def prettyrepr(o):
67 def prettyrepr(o):
68 """Pretty print a representation of a possibly-nested object"""
68 """Pretty print a representation of a possibly-nested object"""
69 lines = []
69 lines = []
70 rs = pycompat.byterepr(o)
70 rs = pycompat.byterepr(o)
71 p0 = p1 = 0
71 p0 = p1 = 0
72 while p0 < len(rs):
72 while p0 < len(rs):
73 # '... field=<type ... field=<type ...'
73 # '... field=<type ... field=<type ...'
74 # ~~~~~~~~~~~~~~~~
74 # ~~~~~~~~~~~~~~~~
75 # p0 p1 q0 q1
75 # p0 p1 q0 q1
76 q0 = -1
76 q0 = -1
77 q1 = rs.find('<', p1 + 1)
77 q1 = rs.find('<', p1 + 1)
78 if q1 < 0:
78 if q1 < 0:
79 q1 = len(rs)
79 q1 = len(rs)
80 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
80 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
81 # backtrack for ' field=<'
81 # backtrack for ' field=<'
82 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
82 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
83 if q0 < 0:
83 if q0 < 0:
84 q0 = q1
84 q0 = q1
85 else:
85 else:
86 q0 += 1 # skip ' '
86 q0 += 1 # skip ' '
87 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
87 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
88 assert l >= 0
88 assert l >= 0
89 lines.append((l, rs[p0:q0].rstrip()))
89 lines.append((l, rs[p0:q0].rstrip()))
90 p0, p1 = q0, q1
90 p0, p1 = q0, q1
91 return '\n'.join(' ' * l + s for l, s in lines)
91 return '\n'.join(' ' * l + s for l, s in lines)
92
92
93 def binary(s):
93 def binary(s):
94 """return true if a string is binary data"""
94 """return true if a string is binary data"""
95 return bool(s and '\0' in s)
95 return bool(s and '\0' in s)
96
96
97 def stringmatcher(pattern, casesensitive=True):
97 def stringmatcher(pattern, casesensitive=True):
98 """
98 """
99 accepts a string, possibly starting with 're:' or 'literal:' prefix.
99 accepts a string, possibly starting with 're:' or 'literal:' prefix.
100 returns the matcher name, pattern, and matcher function.
100 returns the matcher name, pattern, and matcher function.
101 missing or unknown prefixes are treated as literal matches.
101 missing or unknown prefixes are treated as literal matches.
102
102
103 helper for tests:
103 helper for tests:
104 >>> def test(pattern, *tests):
104 >>> def test(pattern, *tests):
105 ... kind, pattern, matcher = stringmatcher(pattern)
105 ... kind, pattern, matcher = stringmatcher(pattern)
106 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
106 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
107 >>> def itest(pattern, *tests):
107 >>> def itest(pattern, *tests):
108 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
108 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
109 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
109 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
110
110
111 exact matching (no prefix):
111 exact matching (no prefix):
112 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
112 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
113 ('literal', 'abcdefg', [False, False, True])
113 ('literal', 'abcdefg', [False, False, True])
114
114
115 regex matching ('re:' prefix)
115 regex matching ('re:' prefix)
116 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
116 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
117 ('re', 'a.+b', [False, False, True])
117 ('re', 'a.+b', [False, False, True])
118
118
119 force exact matches ('literal:' prefix)
119 force exact matches ('literal:' prefix)
120 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
120 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
121 ('literal', 're:foobar', [False, True])
121 ('literal', 're:foobar', [False, True])
122
122
123 unknown prefixes are ignored and treated as literals
123 unknown prefixes are ignored and treated as literals
124 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
124 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
125 ('literal', 'foo:bar', [False, False, True])
125 ('literal', 'foo:bar', [False, False, True])
126
126
127 case insensitive regex matches
127 case insensitive regex matches
128 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
128 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
129 ('re', 'A.+b', [False, False, True])
129 ('re', 'A.+b', [False, False, True])
130
130
131 case insensitive literal matches
131 case insensitive literal matches
132 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
132 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
133 ('literal', 'ABCDEFG', [False, False, True])
133 ('literal', 'ABCDEFG', [False, False, True])
134 """
134 """
135 if pattern.startswith('re:'):
135 if pattern.startswith('re:'):
136 pattern = pattern[3:]
136 pattern = pattern[3:]
137 try:
137 try:
138 flags = 0
138 flags = 0
139 if not casesensitive:
139 if not casesensitive:
140 flags = remod.I
140 flags = remod.I
141 regex = remod.compile(pattern, flags)
141 regex = remod.compile(pattern, flags)
142 except remod.error as e:
142 except remod.error as e:
143 raise error.ParseError(_('invalid regular expression: %s')
143 raise error.ParseError(_('invalid regular expression: %s')
144 % e)
144 % e)
145 return 're', pattern, regex.search
145 return 're', pattern, regex.search
146 elif pattern.startswith('literal:'):
146 elif pattern.startswith('literal:'):
147 pattern = pattern[8:]
147 pattern = pattern[8:]
148
148
149 match = pattern.__eq__
149 match = pattern.__eq__
150
150
151 if not casesensitive:
151 if not casesensitive:
152 ipat = encoding.lower(pattern)
152 ipat = encoding.lower(pattern)
153 match = lambda s: ipat == encoding.lower(s)
153 match = lambda s: ipat == encoding.lower(s)
154 return 'literal', pattern, match
154 return 'literal', pattern, match
155
155
156 def shortuser(user):
156 def shortuser(user):
157 """Return a short representation of a user name or email address."""
157 """Return a short representation of a user name or email address."""
158 f = user.find('@')
158 f = user.find('@')
159 if f >= 0:
159 if f >= 0:
160 user = user[:f]
160 user = user[:f]
161 f = user.find('<')
161 f = user.find('<')
162 if f >= 0:
162 if f >= 0:
163 user = user[f + 1:]
163 user = user[f + 1:]
164 f = user.find(' ')
164 f = user.find(' ')
165 if f >= 0:
165 if f >= 0:
166 user = user[:f]
166 user = user[:f]
167 f = user.find('.')
167 f = user.find('.')
168 if f >= 0:
168 if f >= 0:
169 user = user[:f]
169 user = user[:f]
170 return user
170 return user
171
171
172 def emailuser(user):
172 def emailuser(user):
173 """Return the user portion of an email address."""
173 """Return the user portion of an email address."""
174 f = user.find('@')
174 f = user.find('@')
175 if f >= 0:
175 if f >= 0:
176 user = user[:f]
176 user = user[:f]
177 f = user.find('<')
177 f = user.find('<')
178 if f >= 0:
178 if f >= 0:
179 user = user[f + 1:]
179 user = user[f + 1:]
180 return user
180 return user
181
181
182 def email(author):
182 def email(author):
183 '''get email of author.'''
183 '''get email of author.'''
184 r = author.find('>')
184 r = author.find('>')
185 if r == -1:
185 if r == -1:
186 r = None
186 r = None
187 return author[author.find('<') + 1:r]
187 return author[author.find('<') + 1:r]
188
188
189 def person(author):
189 def person(author):
190 """Returns the name before an email address,
190 """Returns the name before an email address,
191 interpreting it as per RFC 5322
191 interpreting it as per RFC 5322
192
192
193 >>> person(b'foo@bar')
193 >>> person(b'foo@bar')
194 'foo'
194 'foo'
195 >>> person(b'Foo Bar <foo@bar>')
195 >>> person(b'Foo Bar <foo@bar>')
196 'Foo Bar'
196 'Foo Bar'
197 >>> person(b'"Foo Bar" <foo@bar>')
197 >>> person(b'"Foo Bar" <foo@bar>')
198 'Foo Bar'
198 'Foo Bar'
199 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
199 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
200 'Foo "buz" Bar'
200 'Foo "buz" Bar'
201 >>> # The following are invalid, but do exist in real-life
201 >>> # The following are invalid, but do exist in real-life
202 ...
202 ...
203 >>> person(b'Foo "buz" Bar <foo@bar>')
203 >>> person(b'Foo "buz" Bar <foo@bar>')
204 'Foo "buz" Bar'
204 'Foo "buz" Bar'
205 >>> person(b'"Foo Bar <foo@bar>')
205 >>> person(b'"Foo Bar <foo@bar>')
206 'Foo Bar'
206 'Foo Bar'
207 """
207 """
208 if '@' not in author:
208 if '@' not in author:
209 return author
209 return author
210 f = author.find('<')
210 f = author.find('<')
211 if f != -1:
211 if f != -1:
212 return author[:f].strip(' "').replace('\\"', '"')
212 return author[:f].strip(' "').replace('\\"', '"')
213 f = author.find('@')
213 f = author.find('@')
214 return author[:f].replace('.', ' ')
214 return author[:f].replace('.', ' ')
215
215
216 @attr.s(hash=True)
216 @attr.s(hash=True)
217 class mailmapping(object):
217 class mailmapping(object):
218 '''Represents a username/email key or value in
218 '''Represents a username/email key or value in
219 a mailmap file'''
219 a mailmap file'''
220 email = attr.ib()
220 email = attr.ib()
221 name = attr.ib(default=None)
221 name = attr.ib(default=None)
222
222
223 def _ismailmaplineinvalid(names, emails):
223 def _ismailmaplineinvalid(names, emails):
224 '''Returns True if the parsed names and emails
224 '''Returns True if the parsed names and emails
225 in a mailmap entry are invalid.
225 in a mailmap entry are invalid.
226
226
227 >>> # No names or emails fails
227 >>> # No names or emails fails
228 >>> names, emails = [], []
228 >>> names, emails = [], []
229 >>> _ismailmaplineinvalid(names, emails)
229 >>> _ismailmaplineinvalid(names, emails)
230 True
230 True
231 >>> # Only one email fails
231 >>> # Only one email fails
232 >>> emails = [b'email@email.com']
232 >>> emails = [b'email@email.com']
233 >>> _ismailmaplineinvalid(names, emails)
233 >>> _ismailmaplineinvalid(names, emails)
234 True
234 True
235 >>> # One email and one name passes
235 >>> # One email and one name passes
236 >>> names = [b'Test Name']
236 >>> names = [b'Test Name']
237 >>> _ismailmaplineinvalid(names, emails)
237 >>> _ismailmaplineinvalid(names, emails)
238 False
238 False
239 >>> # No names but two emails passes
239 >>> # No names but two emails passes
240 >>> names = []
240 >>> names = []
241 >>> emails = [b'proper@email.com', b'commit@email.com']
241 >>> emails = [b'proper@email.com', b'commit@email.com']
242 >>> _ismailmaplineinvalid(names, emails)
242 >>> _ismailmaplineinvalid(names, emails)
243 False
243 False
244 '''
244 '''
245 return not emails or not names and len(emails) < 2
245 return not emails or not names and len(emails) < 2
246
246
247 def parsemailmap(mailmapcontent):
247 def parsemailmap(mailmapcontent):
248 """Parses data in the .mailmap format
248 """Parses data in the .mailmap format
249
249
250 >>> mmdata = b"\\n".join([
250 >>> mmdata = b"\\n".join([
251 ... b'# Comment',
251 ... b'# Comment',
252 ... b'Name <commit1@email.xx>',
252 ... b'Name <commit1@email.xx>',
253 ... b'<name@email.xx> <commit2@email.xx>',
253 ... b'<name@email.xx> <commit2@email.xx>',
254 ... b'Name <proper@email.xx> <commit3@email.xx>',
254 ... b'Name <proper@email.xx> <commit3@email.xx>',
255 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
255 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
256 ... ])
256 ... ])
257 >>> mm = parsemailmap(mmdata)
257 >>> mm = parsemailmap(mmdata)
258 >>> for key in sorted(mm.keys()):
258 >>> for key in sorted(mm.keys()):
259 ... print(key)
259 ... print(key)
260 mailmapping(email='commit1@email.xx', name=None)
260 mailmapping(email='commit1@email.xx', name=None)
261 mailmapping(email='commit2@email.xx', name=None)
261 mailmapping(email='commit2@email.xx', name=None)
262 mailmapping(email='commit3@email.xx', name=None)
262 mailmapping(email='commit3@email.xx', name=None)
263 mailmapping(email='commit4@email.xx', name='Commit')
263 mailmapping(email='commit4@email.xx', name='Commit')
264 >>> for val in sorted(mm.values()):
264 >>> for val in sorted(mm.values()):
265 ... print(val)
265 ... print(val)
266 mailmapping(email='commit1@email.xx', name='Name')
266 mailmapping(email='commit1@email.xx', name='Name')
267 mailmapping(email='name@email.xx', name=None)
267 mailmapping(email='name@email.xx', name=None)
268 mailmapping(email='proper@email.xx', name='Name')
268 mailmapping(email='proper@email.xx', name='Name')
269 mailmapping(email='proper@email.xx', name='Name')
269 mailmapping(email='proper@email.xx', name='Name')
270 """
270 """
271 mailmap = {}
271 mailmap = {}
272
272
273 if mailmapcontent is None:
273 if mailmapcontent is None:
274 return mailmap
274 return mailmap
275
275
276 for line in mailmapcontent.splitlines():
276 for line in mailmapcontent.splitlines():
277
277
278 # Don't bother checking the line if it is a comment or
278 # Don't bother checking the line if it is a comment or
279 # is an improperly formed author field
279 # is an improperly formed author field
280 if line.lstrip().startswith('#'):
280 if line.lstrip().startswith('#'):
281 continue
281 continue
282
282
283 # names, emails hold the parsed emails and names for each line
283 # names, emails hold the parsed emails and names for each line
284 # name_builder holds the words in a persons name
284 # name_builder holds the words in a persons name
285 names, emails = [], []
285 names, emails = [], []
286 namebuilder = []
286 namebuilder = []
287
287
288 for element in line.split():
288 for element in line.split():
289 if element.startswith('#'):
289 if element.startswith('#'):
290 # If we reach a comment in the mailmap file, move on
290 # If we reach a comment in the mailmap file, move on
291 break
291 break
292
292
293 elif element.startswith('<') and element.endswith('>'):
293 elif element.startswith('<') and element.endswith('>'):
294 # We have found an email.
294 # We have found an email.
295 # Parse it, and finalize any names from earlier
295 # Parse it, and finalize any names from earlier
296 emails.append(element[1:-1]) # Slice off the "<>"
296 emails.append(element[1:-1]) # Slice off the "<>"
297
297
298 if namebuilder:
298 if namebuilder:
299 names.append(' '.join(namebuilder))
299 names.append(' '.join(namebuilder))
300 namebuilder = []
300 namebuilder = []
301
301
302 # Break if we have found a second email, any other
302 # Break if we have found a second email, any other
303 # data does not fit the spec for .mailmap
303 # data does not fit the spec for .mailmap
304 if len(emails) > 1:
304 if len(emails) > 1:
305 break
305 break
306
306
307 else:
307 else:
308 # We have found another word in the committers name
308 # We have found another word in the committers name
309 namebuilder.append(element)
309 namebuilder.append(element)
310
310
311 # Check to see if we have parsed the line into a valid form
311 # Check to see if we have parsed the line into a valid form
312 # We require at least one email, and either at least one
312 # We require at least one email, and either at least one
313 # name or a second email
313 # name or a second email
314 if _ismailmaplineinvalid(names, emails):
314 if _ismailmaplineinvalid(names, emails):
315 continue
315 continue
316
316
317 mailmapkey = mailmapping(
317 mailmapkey = mailmapping(
318 email=emails[-1],
318 email=emails[-1],
319 name=names[-1] if len(names) == 2 else None,
319 name=names[-1] if len(names) == 2 else None,
320 )
320 )
321
321
322 mailmap[mailmapkey] = mailmapping(
322 mailmap[mailmapkey] = mailmapping(
323 email=emails[0],
323 email=emails[0],
324 name=names[0] if names else None,
324 name=names[0] if names else None,
325 )
325 )
326
326
327 return mailmap
327 return mailmap
328
328
329 def mapname(mailmap, author):
329 def mapname(mailmap, author):
330 """Returns the author field according to the mailmap cache, or
330 """Returns the author field according to the mailmap cache, or
331 the original author field.
331 the original author field.
332
332
333 >>> mmdata = b"\\n".join([
333 >>> mmdata = b"\\n".join([
334 ... b'# Comment',
334 ... b'# Comment',
335 ... b'Name <commit1@email.xx>',
335 ... b'Name <commit1@email.xx>',
336 ... b'<name@email.xx> <commit2@email.xx>',
336 ... b'<name@email.xx> <commit2@email.xx>',
337 ... b'Name <proper@email.xx> <commit3@email.xx>',
337 ... b'Name <proper@email.xx> <commit3@email.xx>',
338 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
338 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
339 ... ])
339 ... ])
340 >>> m = parsemailmap(mmdata)
340 >>> m = parsemailmap(mmdata)
341 >>> mapname(m, b'Commit <commit1@email.xx>')
341 >>> mapname(m, b'Commit <commit1@email.xx>')
342 'Name <commit1@email.xx>'
342 'Name <commit1@email.xx>'
343 >>> mapname(m, b'Name <commit2@email.xx>')
343 >>> mapname(m, b'Name <commit2@email.xx>')
344 'Name <name@email.xx>'
344 'Name <name@email.xx>'
345 >>> mapname(m, b'Commit <commit3@email.xx>')
345 >>> mapname(m, b'Commit <commit3@email.xx>')
346 'Name <proper@email.xx>'
346 'Name <proper@email.xx>'
347 >>> mapname(m, b'Commit <commit4@email.xx>')
347 >>> mapname(m, b'Commit <commit4@email.xx>')
348 'Name <proper@email.xx>'
348 'Name <proper@email.xx>'
349 >>> mapname(m, b'Unknown Name <unknown@email.com>')
349 >>> mapname(m, b'Unknown Name <unknown@email.com>')
350 'Unknown Name <unknown@email.com>'
350 'Unknown Name <unknown@email.com>'
351 """
351 """
352 # If the author field coming in isn't in the correct format,
352 # If the author field coming in isn't in the correct format,
353 # or the mailmap is empty just return the original author field
353 # or the mailmap is empty just return the original author field
354 if not isauthorwellformed(author) or not mailmap:
354 if not isauthorwellformed(author) or not mailmap:
355 return author
355 return author
356
356
357 # Turn the user name into a mailmapping
357 # Turn the user name into a mailmapping
358 commit = mailmapping(name=person(author), email=email(author))
358 commit = mailmapping(name=person(author), email=email(author))
359
359
360 try:
360 try:
361 # Try and use both the commit email and name as the key
361 # Try and use both the commit email and name as the key
362 proper = mailmap[commit]
362 proper = mailmap[commit]
363
363
364 except KeyError:
364 except KeyError:
365 # If the lookup fails, use just the email as the key instead
365 # If the lookup fails, use just the email as the key instead
366 # We call this commit2 as not to erase original commit fields
366 # We call this commit2 as not to erase original commit fields
367 commit2 = mailmapping(email=commit.email)
367 commit2 = mailmapping(email=commit.email)
368 proper = mailmap.get(commit2, mailmapping(None, None))
368 proper = mailmap.get(commit2, mailmapping(None, None))
369
369
370 # Return the author field with proper values filled in
370 # Return the author field with proper values filled in
371 return '%s <%s>' % (
371 return '%s <%s>' % (
372 proper.name if proper.name else commit.name,
372 proper.name if proper.name else commit.name,
373 proper.email if proper.email else commit.email,
373 proper.email if proper.email else commit.email,
374 )
374 )
375
375
376 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
376 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
377
377
378 def isauthorwellformed(author):
378 def isauthorwellformed(author):
379 '''Return True if the author field is well formed
379 '''Return True if the author field is well formed
380 (ie "Contributor Name <contrib@email.dom>")
380 (ie "Contributor Name <contrib@email.dom>")
381
381
382 >>> isauthorwellformed(b'Good Author <good@author.com>')
382 >>> isauthorwellformed(b'Good Author <good@author.com>')
383 True
383 True
384 >>> isauthorwellformed(b'Author <good@author.com>')
384 >>> isauthorwellformed(b'Author <good@author.com>')
385 True
385 True
386 >>> isauthorwellformed(b'Bad Author')
386 >>> isauthorwellformed(b'Bad Author')
387 False
387 False
388 >>> isauthorwellformed(b'Bad Author <author@author.com')
388 >>> isauthorwellformed(b'Bad Author <author@author.com')
389 False
389 False
390 >>> isauthorwellformed(b'Bad Author author@author.com')
390 >>> isauthorwellformed(b'Bad Author author@author.com')
391 False
391 False
392 >>> isauthorwellformed(b'<author@author.com>')
392 >>> isauthorwellformed(b'<author@author.com>')
393 False
393 False
394 >>> isauthorwellformed(b'Bad Author <author>')
394 >>> isauthorwellformed(b'Bad Author <author>')
395 False
395 False
396 '''
396 '''
397 return _correctauthorformat.match(author) is not None
397 return _correctauthorformat.match(author) is not None
398
398
399 def ellipsis(text, maxlength=400):
399 def ellipsis(text, maxlength=400):
400 """Trim string to at most maxlength (default: 400) columns in display."""
400 """Trim string to at most maxlength (default: 400) columns in display."""
401 return encoding.trim(text, maxlength, ellipsis='...')
401 return encoding.trim(text, maxlength, ellipsis='...')
402
402
403 def escapestr(s):
403 def escapestr(s):
404 # call underlying function of s.encode('string_escape') directly for
404 # call underlying function of s.encode('string_escape') directly for
405 # Python 3 compatibility
405 # Python 3 compatibility
406 return codecs.escape_encode(s)[0]
406 return codecs.escape_encode(s)[0]
407
407
408 def unescapestr(s):
408 def unescapestr(s):
409 return codecs.escape_decode(s)[0]
409 return codecs.escape_decode(s)[0]
410
410
411 def forcebytestr(obj):
411 def forcebytestr(obj):
412 """Portably format an arbitrary object (e.g. exception) into a byte
412 """Portably format an arbitrary object (e.g. exception) into a byte
413 string."""
413 string."""
414 try:
414 try:
415 return pycompat.bytestr(obj)
415 return pycompat.bytestr(obj)
416 except UnicodeEncodeError:
416 except UnicodeEncodeError:
417 # non-ascii string, may be lossy
417 # non-ascii string, may be lossy
418 return pycompat.bytestr(encoding.strtolocal(str(obj)))
418 return pycompat.bytestr(encoding.strtolocal(str(obj)))
419
419
420 def uirepr(s):
420 def uirepr(s):
421 # Avoid double backslash in Windows path repr()
421 # Avoid double backslash in Windows path repr()
422 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
422 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
423
423
424 # delay import of textwrap
424 # delay import of textwrap
425 def _MBTextWrapper(**kwargs):
425 def _MBTextWrapper(**kwargs):
426 class tw(textwrap.TextWrapper):
426 class tw(textwrap.TextWrapper):
427 """
427 """
428 Extend TextWrapper for width-awareness.
428 Extend TextWrapper for width-awareness.
429
429
430 Neither number of 'bytes' in any encoding nor 'characters' is
430 Neither number of 'bytes' in any encoding nor 'characters' is
431 appropriate to calculate terminal columns for specified string.
431 appropriate to calculate terminal columns for specified string.
432
432
433 Original TextWrapper implementation uses built-in 'len()' directly,
433 Original TextWrapper implementation uses built-in 'len()' directly,
434 so overriding is needed to use width information of each characters.
434 so overriding is needed to use width information of each characters.
435
435
436 In addition, characters classified into 'ambiguous' width are
436 In addition, characters classified into 'ambiguous' width are
437 treated as wide in East Asian area, but as narrow in other.
437 treated as wide in East Asian area, but as narrow in other.
438
438
439 This requires use decision to determine width of such characters.
439 This requires use decision to determine width of such characters.
440 """
440 """
441 def _cutdown(self, ucstr, space_left):
441 def _cutdown(self, ucstr, space_left):
442 l = 0
442 l = 0
443 colwidth = encoding.ucolwidth
443 colwidth = encoding.ucolwidth
444 for i in xrange(len(ucstr)):
444 for i in xrange(len(ucstr)):
445 l += colwidth(ucstr[i])
445 l += colwidth(ucstr[i])
446 if space_left < l:
446 if space_left < l:
447 return (ucstr[:i], ucstr[i:])
447 return (ucstr[:i], ucstr[i:])
448 return ucstr, ''
448 return ucstr, ''
449
449
450 # overriding of base class
450 # overriding of base class
451 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
451 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
452 space_left = max(width - cur_len, 1)
452 space_left = max(width - cur_len, 1)
453
453
454 if self.break_long_words:
454 if self.break_long_words:
455 cut, res = self._cutdown(reversed_chunks[-1], space_left)
455 cut, res = self._cutdown(reversed_chunks[-1], space_left)
456 cur_line.append(cut)
456 cur_line.append(cut)
457 reversed_chunks[-1] = res
457 reversed_chunks[-1] = res
458 elif not cur_line:
458 elif not cur_line:
459 cur_line.append(reversed_chunks.pop())
459 cur_line.append(reversed_chunks.pop())
460
460
461 # this overriding code is imported from TextWrapper of Python 2.6
461 # this overriding code is imported from TextWrapper of Python 2.6
462 # to calculate columns of string by 'encoding.ucolwidth()'
462 # to calculate columns of string by 'encoding.ucolwidth()'
463 def _wrap_chunks(self, chunks):
463 def _wrap_chunks(self, chunks):
464 colwidth = encoding.ucolwidth
464 colwidth = encoding.ucolwidth
465
465
466 lines = []
466 lines = []
467 if self.width <= 0:
467 if self.width <= 0:
468 raise ValueError("invalid width %r (must be > 0)" % self.width)
468 raise ValueError("invalid width %r (must be > 0)" % self.width)
469
469
470 # Arrange in reverse order so items can be efficiently popped
470 # Arrange in reverse order so items can be efficiently popped
471 # from a stack of chucks.
471 # from a stack of chucks.
472 chunks.reverse()
472 chunks.reverse()
473
473
474 while chunks:
474 while chunks:
475
475
476 # Start the list of chunks that will make up the current line.
476 # Start the list of chunks that will make up the current line.
477 # cur_len is just the length of all the chunks in cur_line.
477 # cur_len is just the length of all the chunks in cur_line.
478 cur_line = []
478 cur_line = []
479 cur_len = 0
479 cur_len = 0
480
480
481 # Figure out which static string will prefix this line.
481 # Figure out which static string will prefix this line.
482 if lines:
482 if lines:
483 indent = self.subsequent_indent
483 indent = self.subsequent_indent
484 else:
484 else:
485 indent = self.initial_indent
485 indent = self.initial_indent
486
486
487 # Maximum width for this line.
487 # Maximum width for this line.
488 width = self.width - len(indent)
488 width = self.width - len(indent)
489
489
490 # First chunk on line is whitespace -- drop it, unless this
490 # First chunk on line is whitespace -- drop it, unless this
491 # is the very beginning of the text (i.e. no lines started yet).
491 # is the very beginning of the text (i.e. no lines started yet).
492 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
492 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
493 del chunks[-1]
493 del chunks[-1]
494
494
495 while chunks:
495 while chunks:
496 l = colwidth(chunks[-1])
496 l = colwidth(chunks[-1])
497
497
498 # Can at least squeeze this chunk onto the current line.
498 # Can at least squeeze this chunk onto the current line.
499 if cur_len + l <= width:
499 if cur_len + l <= width:
500 cur_line.append(chunks.pop())
500 cur_line.append(chunks.pop())
501 cur_len += l
501 cur_len += l
502
502
503 # Nope, this line is full.
503 # Nope, this line is full.
504 else:
504 else:
505 break
505 break
506
506
507 # The current line is full, and the next chunk is too big to
507 # The current line is full, and the next chunk is too big to
508 # fit on *any* line (not just this one).
508 # fit on *any* line (not just this one).
509 if chunks and colwidth(chunks[-1]) > width:
509 if chunks and colwidth(chunks[-1]) > width:
510 self._handle_long_word(chunks, cur_line, cur_len, width)
510 self._handle_long_word(chunks, cur_line, cur_len, width)
511
511
512 # If the last chunk on this line is all whitespace, drop it.
512 # If the last chunk on this line is all whitespace, drop it.
513 if (self.drop_whitespace and
513 if (self.drop_whitespace and
514 cur_line and cur_line[-1].strip() == r''):
514 cur_line and cur_line[-1].strip() == r''):
515 del cur_line[-1]
515 del cur_line[-1]
516
516
517 # Convert current line back to a string and store it in list
517 # Convert current line back to a string and store it in list
518 # of all lines (return value).
518 # of all lines (return value).
519 if cur_line:
519 if cur_line:
520 lines.append(indent + r''.join(cur_line))
520 lines.append(indent + r''.join(cur_line))
521
521
522 return lines
522 return lines
523
523
524 global _MBTextWrapper
524 global _MBTextWrapper
525 _MBTextWrapper = tw
525 _MBTextWrapper = tw
526 return tw(**kwargs)
526 return tw(**kwargs)
527
527
528 def wrap(line, width, initindent='', hangindent=''):
528 def wrap(line, width, initindent='', hangindent=''):
529 maxindent = max(len(hangindent), len(initindent))
529 maxindent = max(len(hangindent), len(initindent))
530 if width <= maxindent:
530 if width <= maxindent:
531 # adjust for weird terminal size
531 # adjust for weird terminal size
532 width = max(78, maxindent + 1)
532 width = max(78, maxindent + 1)
533 line = line.decode(pycompat.sysstr(encoding.encoding),
533 line = line.decode(pycompat.sysstr(encoding.encoding),
534 pycompat.sysstr(encoding.encodingmode))
534 pycompat.sysstr(encoding.encodingmode))
535 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
535 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
536 pycompat.sysstr(encoding.encodingmode))
536 pycompat.sysstr(encoding.encodingmode))
537 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
537 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
538 pycompat.sysstr(encoding.encodingmode))
538 pycompat.sysstr(encoding.encodingmode))
539 wrapper = _MBTextWrapper(width=width,
539 wrapper = _MBTextWrapper(width=width,
540 initial_indent=initindent,
540 initial_indent=initindent,
541 subsequent_indent=hangindent)
541 subsequent_indent=hangindent)
542 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
542 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
543
543
544 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
544 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
545 '0': False, 'no': False, 'false': False, 'off': False,
545 '0': False, 'no': False, 'false': False, 'off': False,
546 'never': False}
546 'never': False}
547
547
548 def parsebool(s):
548 def parsebool(s):
549 """Parse s into a boolean.
549 """Parse s into a boolean.
550
550
551 If s is not a valid boolean, returns None.
551 If s is not a valid boolean, returns None.
552 """
552 """
553 return _booleans.get(s.lower(), None)
553 return _booleans.get(s.lower(), None)
554
554
555 def evalpythonliteral(s):
555 def evalpythonliteral(s):
556 """Evaluate a string containing a Python literal expression"""
556 """Evaluate a string containing a Python literal expression"""
557 # We could backport our tokenizer hack to rewrite '' to u'' if we want
557 # We could backport our tokenizer hack to rewrite '' to u'' if we want
558 if pycompat.ispy3:
558 if pycompat.ispy3:
559 return ast.literal_eval(s.decode('latin1'))
559 return ast.literal_eval(s.decode('latin1'))
560 return ast.literal_eval(s)
560 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now