##// END OF EJS Templates
stringutil: allow to specify initial indent level of pprint()...
Yuya Nishihara -
r40312:be57c701 default
parent child Browse files
Show More
@@ -1,759 +1,761
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import ast
12 import ast
13 import codecs
13 import codecs
14 import re as remod
14 import re as remod
15 import textwrap
15 import textwrap
16 import types
16 import types
17
17
18 from ..i18n import _
18 from ..i18n import _
19 from ..thirdparty import attr
19 from ..thirdparty import attr
20
20
21 from .. import (
21 from .. import (
22 encoding,
22 encoding,
23 error,
23 error,
24 pycompat,
24 pycompat,
25 )
25 )
26
26
27 # regex special chars pulled from https://bugs.python.org/issue29995
27 # regex special chars pulled from https://bugs.python.org/issue29995
28 # which was part of Python 3.7.
28 # which was part of Python 3.7.
29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
31
31
32 def reescape(pat):
32 def reescape(pat):
33 """Drop-in replacement for re.escape."""
33 """Drop-in replacement for re.escape."""
34 # NOTE: it is intentional that this works on unicodes and not
34 # NOTE: it is intentional that this works on unicodes and not
35 # bytes, as it's only possible to do the escaping with
35 # bytes, as it's only possible to do the escaping with
36 # unicode.translate, not bytes.translate. Sigh.
36 # unicode.translate, not bytes.translate. Sigh.
37 wantuni = True
37 wantuni = True
38 if isinstance(pat, bytes):
38 if isinstance(pat, bytes):
39 wantuni = False
39 wantuni = False
40 pat = pat.decode('latin1')
40 pat = pat.decode('latin1')
41 pat = pat.translate(_regexescapemap)
41 pat = pat.translate(_regexescapemap)
42 if wantuni:
42 if wantuni:
43 return pat
43 return pat
44 return pat.encode('latin1')
44 return pat.encode('latin1')
45
45
46 def pprint(o, bprefix=False, indent=0):
46 def pprint(o, bprefix=False, indent=0, level=0):
47 """Pretty print an object."""
47 """Pretty print an object."""
48 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent))
48 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
49
49
50 def pprintgen(o, bprefix=False, indent=0, _level=0):
50 def pprintgen(o, bprefix=False, indent=0, level=0):
51 """Pretty print an object to a generator of atoms.
51 """Pretty print an object to a generator of atoms.
52
52
53 ``bprefix`` is a flag influencing whether bytestrings are preferred with
53 ``bprefix`` is a flag influencing whether bytestrings are preferred with
54 a ``b''`` prefix.
54 a ``b''`` prefix.
55
55
56 ``indent`` controls whether collections and nested data structures
56 ``indent`` controls whether collections and nested data structures
57 span multiple lines via the indentation amount in spaces. By default,
57 span multiple lines via the indentation amount in spaces. By default,
58 no newlines are emitted.
58 no newlines are emitted.
59
60 ``level`` specifies the initial indent level. Used if ``indent > 0``.
59 """
61 """
60
62
61 if isinstance(o, bytes):
63 if isinstance(o, bytes):
62 if bprefix:
64 if bprefix:
63 yield "b'%s'" % escapestr(o)
65 yield "b'%s'" % escapestr(o)
64 else:
66 else:
65 yield "'%s'" % escapestr(o)
67 yield "'%s'" % escapestr(o)
66 elif isinstance(o, bytearray):
68 elif isinstance(o, bytearray):
67 # codecs.escape_encode() can't handle bytearray, so escapestr fails
69 # codecs.escape_encode() can't handle bytearray, so escapestr fails
68 # without coercion.
70 # without coercion.
69 yield "bytearray['%s']" % escapestr(bytes(o))
71 yield "bytearray['%s']" % escapestr(bytes(o))
70 elif isinstance(o, list):
72 elif isinstance(o, list):
71 if not o:
73 if not o:
72 yield '[]'
74 yield '[]'
73 return
75 return
74
76
75 yield '['
77 yield '['
76
78
77 if indent:
79 if indent:
78 _level += 1
80 level += 1
79 yield '\n'
81 yield '\n'
80 yield ' ' * (_level * indent)
82 yield ' ' * (level * indent)
81
83
82 for i, a in enumerate(o):
84 for i, a in enumerate(o):
83 for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
85 for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
84 _level=_level):
86 level=level):
85 yield chunk
87 yield chunk
86
88
87 if i + 1 < len(o):
89 if i + 1 < len(o):
88 if indent:
90 if indent:
89 yield ',\n'
91 yield ',\n'
90 yield ' ' * (_level * indent)
92 yield ' ' * (level * indent)
91 else:
93 else:
92 yield ', '
94 yield ', '
93
95
94 if indent:
96 if indent:
95 _level -= 1
97 level -= 1
96 yield '\n'
98 yield '\n'
97 yield ' ' * (_level * indent)
99 yield ' ' * (level * indent)
98
100
99 yield ']'
101 yield ']'
100 elif isinstance(o, dict):
102 elif isinstance(o, dict):
101 if not o:
103 if not o:
102 yield '{}'
104 yield '{}'
103 return
105 return
104
106
105 yield '{'
107 yield '{'
106
108
107 if indent:
109 if indent:
108 _level += 1
110 level += 1
109 yield '\n'
111 yield '\n'
110 yield ' ' * (_level * indent)
112 yield ' ' * (level * indent)
111
113
112 for i, (k, v) in enumerate(sorted(o.items())):
114 for i, (k, v) in enumerate(sorted(o.items())):
113 for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
115 for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
114 _level=_level):
116 level=level):
115 yield chunk
117 yield chunk
116
118
117 yield ': '
119 yield ': '
118
120
119 for chunk in pprintgen(v, bprefix=bprefix, indent=indent,
121 for chunk in pprintgen(v, bprefix=bprefix, indent=indent,
120 _level=_level):
122 level=level):
121 yield chunk
123 yield chunk
122
124
123 if i + 1 < len(o):
125 if i + 1 < len(o):
124 if indent:
126 if indent:
125 yield ',\n'
127 yield ',\n'
126 yield ' ' * (_level * indent)
128 yield ' ' * (level * indent)
127 else:
129 else:
128 yield ', '
130 yield ', '
129
131
130 if indent:
132 if indent:
131 _level -= 1
133 level -= 1
132 yield '\n'
134 yield '\n'
133 yield ' ' * (_level * indent)
135 yield ' ' * (level * indent)
134
136
135 yield '}'
137 yield '}'
136 elif isinstance(o, set):
138 elif isinstance(o, set):
137 if not o:
139 if not o:
138 yield 'set([])'
140 yield 'set([])'
139 return
141 return
140
142
141 yield 'set(['
143 yield 'set(['
142
144
143 if indent:
145 if indent:
144 _level += 1
146 level += 1
145 yield '\n'
147 yield '\n'
146 yield ' ' * (_level * indent)
148 yield ' ' * (level * indent)
147
149
148 for i, k in enumerate(sorted(o)):
150 for i, k in enumerate(sorted(o)):
149 for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
151 for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
150 _level=_level):
152 level=level):
151 yield chunk
153 yield chunk
152
154
153 if i + 1 < len(o):
155 if i + 1 < len(o):
154 if indent:
156 if indent:
155 yield ',\n'
157 yield ',\n'
156 yield ' ' * (_level * indent)
158 yield ' ' * (level * indent)
157 else:
159 else:
158 yield ', '
160 yield ', '
159
161
160 if indent:
162 if indent:
161 _level -= 1
163 level -= 1
162 yield '\n'
164 yield '\n'
163 yield ' ' * (_level * indent)
165 yield ' ' * (level * indent)
164
166
165 yield '])'
167 yield '])'
166 elif isinstance(o, tuple):
168 elif isinstance(o, tuple):
167 if not o:
169 if not o:
168 yield '()'
170 yield '()'
169 return
171 return
170
172
171 yield '('
173 yield '('
172
174
173 if indent:
175 if indent:
174 _level += 1
176 level += 1
175 yield '\n'
177 yield '\n'
176 yield ' ' * (_level * indent)
178 yield ' ' * (level * indent)
177
179
178 for i, a in enumerate(o):
180 for i, a in enumerate(o):
179 for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
181 for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
180 _level=_level):
182 level=level):
181 yield chunk
183 yield chunk
182
184
183 if i + 1 < len(o):
185 if i + 1 < len(o):
184 if indent:
186 if indent:
185 yield ',\n'
187 yield ',\n'
186 yield ' ' * (_level * indent)
188 yield ' ' * (level * indent)
187 else:
189 else:
188 yield ', '
190 yield ', '
189
191
190 if indent:
192 if indent:
191 _level -= 1
193 level -= 1
192 yield '\n'
194 yield '\n'
193 yield ' ' * (_level * indent)
195 yield ' ' * (level * indent)
194
196
195 yield ')'
197 yield ')'
196 elif isinstance(o, types.GeneratorType):
198 elif isinstance(o, types.GeneratorType):
197 # Special case of empty generator.
199 # Special case of empty generator.
198 try:
200 try:
199 nextitem = next(o)
201 nextitem = next(o)
200 except StopIteration:
202 except StopIteration:
201 yield 'gen[]'
203 yield 'gen[]'
202 return
204 return
203
205
204 yield 'gen['
206 yield 'gen['
205
207
206 if indent:
208 if indent:
207 _level += 1
209 level += 1
208 yield '\n'
210 yield '\n'
209 yield ' ' * (_level * indent)
211 yield ' ' * (level * indent)
210
212
211 last = False
213 last = False
212
214
213 while not last:
215 while not last:
214 current = nextitem
216 current = nextitem
215
217
216 try:
218 try:
217 nextitem = next(o)
219 nextitem = next(o)
218 except StopIteration:
220 except StopIteration:
219 last = True
221 last = True
220
222
221 for chunk in pprintgen(current, bprefix=bprefix, indent=indent,
223 for chunk in pprintgen(current, bprefix=bprefix, indent=indent,
222 _level=_level):
224 level=level):
223 yield chunk
225 yield chunk
224
226
225 if not last:
227 if not last:
226 if indent:
228 if indent:
227 yield ',\n'
229 yield ',\n'
228 yield ' ' * (_level * indent)
230 yield ' ' * (level * indent)
229 else:
231 else:
230 yield ', '
232 yield ', '
231
233
232 if indent:
234 if indent:
233 _level -= 1
235 level -= 1
234 yield '\n'
236 yield '\n'
235 yield ' ' * (_level * indent)
237 yield ' ' * (level * indent)
236
238
237 yield ']'
239 yield ']'
238 else:
240 else:
239 yield pycompat.byterepr(o)
241 yield pycompat.byterepr(o)
240
242
241 def prettyrepr(o):
243 def prettyrepr(o):
242 """Pretty print a representation of a possibly-nested object"""
244 """Pretty print a representation of a possibly-nested object"""
243 lines = []
245 lines = []
244 rs = pycompat.byterepr(o)
246 rs = pycompat.byterepr(o)
245 p0 = p1 = 0
247 p0 = p1 = 0
246 while p0 < len(rs):
248 while p0 < len(rs):
247 # '... field=<type ... field=<type ...'
249 # '... field=<type ... field=<type ...'
248 # ~~~~~~~~~~~~~~~~
250 # ~~~~~~~~~~~~~~~~
249 # p0 p1 q0 q1
251 # p0 p1 q0 q1
250 q0 = -1
252 q0 = -1
251 q1 = rs.find('<', p1 + 1)
253 q1 = rs.find('<', p1 + 1)
252 if q1 < 0:
254 if q1 < 0:
253 q1 = len(rs)
255 q1 = len(rs)
254 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
256 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
255 # backtrack for ' field=<'
257 # backtrack for ' field=<'
256 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
258 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
257 if q0 < 0:
259 if q0 < 0:
258 q0 = q1
260 q0 = q1
259 else:
261 else:
260 q0 += 1 # skip ' '
262 q0 += 1 # skip ' '
261 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
263 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
262 assert l >= 0
264 assert l >= 0
263 lines.append((l, rs[p0:q0].rstrip()))
265 lines.append((l, rs[p0:q0].rstrip()))
264 p0, p1 = q0, q1
266 p0, p1 = q0, q1
265 return '\n'.join(' ' * l + s for l, s in lines)
267 return '\n'.join(' ' * l + s for l, s in lines)
266
268
267 def buildrepr(r):
269 def buildrepr(r):
268 """Format an optional printable representation from unexpanded bits
270 """Format an optional printable representation from unexpanded bits
269
271
270 ======== =================================
272 ======== =================================
271 type(r) example
273 type(r) example
272 ======== =================================
274 ======== =================================
273 tuple ('<not %r>', other)
275 tuple ('<not %r>', other)
274 bytes '<branch closed>'
276 bytes '<branch closed>'
275 callable lambda: '<branch %r>' % sorted(b)
277 callable lambda: '<branch %r>' % sorted(b)
276 object other
278 object other
277 ======== =================================
279 ======== =================================
278 """
280 """
279 if r is None:
281 if r is None:
280 return ''
282 return ''
281 elif isinstance(r, tuple):
283 elif isinstance(r, tuple):
282 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
284 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
283 elif isinstance(r, bytes):
285 elif isinstance(r, bytes):
284 return r
286 return r
285 elif callable(r):
287 elif callable(r):
286 return r()
288 return r()
287 else:
289 else:
288 return pprint(r)
290 return pprint(r)
289
291
290 def binary(s):
292 def binary(s):
291 """return true if a string is binary data"""
293 """return true if a string is binary data"""
292 return bool(s and '\0' in s)
294 return bool(s and '\0' in s)
293
295
294 def stringmatcher(pattern, casesensitive=True):
296 def stringmatcher(pattern, casesensitive=True):
295 """
297 """
296 accepts a string, possibly starting with 're:' or 'literal:' prefix.
298 accepts a string, possibly starting with 're:' or 'literal:' prefix.
297 returns the matcher name, pattern, and matcher function.
299 returns the matcher name, pattern, and matcher function.
298 missing or unknown prefixes are treated as literal matches.
300 missing or unknown prefixes are treated as literal matches.
299
301
300 helper for tests:
302 helper for tests:
301 >>> def test(pattern, *tests):
303 >>> def test(pattern, *tests):
302 ... kind, pattern, matcher = stringmatcher(pattern)
304 ... kind, pattern, matcher = stringmatcher(pattern)
303 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
305 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
304 >>> def itest(pattern, *tests):
306 >>> def itest(pattern, *tests):
305 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
307 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
306 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
308 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
307
309
308 exact matching (no prefix):
310 exact matching (no prefix):
309 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
311 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
310 ('literal', 'abcdefg', [False, False, True])
312 ('literal', 'abcdefg', [False, False, True])
311
313
312 regex matching ('re:' prefix)
314 regex matching ('re:' prefix)
313 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
315 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
314 ('re', 'a.+b', [False, False, True])
316 ('re', 'a.+b', [False, False, True])
315
317
316 force exact matches ('literal:' prefix)
318 force exact matches ('literal:' prefix)
317 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
319 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
318 ('literal', 're:foobar', [False, True])
320 ('literal', 're:foobar', [False, True])
319
321
320 unknown prefixes are ignored and treated as literals
322 unknown prefixes are ignored and treated as literals
321 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
323 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
322 ('literal', 'foo:bar', [False, False, True])
324 ('literal', 'foo:bar', [False, False, True])
323
325
324 case insensitive regex matches
326 case insensitive regex matches
325 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
327 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
326 ('re', 'A.+b', [False, False, True])
328 ('re', 'A.+b', [False, False, True])
327
329
328 case insensitive literal matches
330 case insensitive literal matches
329 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
331 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
330 ('literal', 'ABCDEFG', [False, False, True])
332 ('literal', 'ABCDEFG', [False, False, True])
331 """
333 """
332 if pattern.startswith('re:'):
334 if pattern.startswith('re:'):
333 pattern = pattern[3:]
335 pattern = pattern[3:]
334 try:
336 try:
335 flags = 0
337 flags = 0
336 if not casesensitive:
338 if not casesensitive:
337 flags = remod.I
339 flags = remod.I
338 regex = remod.compile(pattern, flags)
340 regex = remod.compile(pattern, flags)
339 except remod.error as e:
341 except remod.error as e:
340 raise error.ParseError(_('invalid regular expression: %s')
342 raise error.ParseError(_('invalid regular expression: %s')
341 % e)
343 % e)
342 return 're', pattern, regex.search
344 return 're', pattern, regex.search
343 elif pattern.startswith('literal:'):
345 elif pattern.startswith('literal:'):
344 pattern = pattern[8:]
346 pattern = pattern[8:]
345
347
346 match = pattern.__eq__
348 match = pattern.__eq__
347
349
348 if not casesensitive:
350 if not casesensitive:
349 ipat = encoding.lower(pattern)
351 ipat = encoding.lower(pattern)
350 match = lambda s: ipat == encoding.lower(s)
352 match = lambda s: ipat == encoding.lower(s)
351 return 'literal', pattern, match
353 return 'literal', pattern, match
352
354
353 def shortuser(user):
355 def shortuser(user):
354 """Return a short representation of a user name or email address."""
356 """Return a short representation of a user name or email address."""
355 f = user.find('@')
357 f = user.find('@')
356 if f >= 0:
358 if f >= 0:
357 user = user[:f]
359 user = user[:f]
358 f = user.find('<')
360 f = user.find('<')
359 if f >= 0:
361 if f >= 0:
360 user = user[f + 1:]
362 user = user[f + 1:]
361 f = user.find(' ')
363 f = user.find(' ')
362 if f >= 0:
364 if f >= 0:
363 user = user[:f]
365 user = user[:f]
364 f = user.find('.')
366 f = user.find('.')
365 if f >= 0:
367 if f >= 0:
366 user = user[:f]
368 user = user[:f]
367 return user
369 return user
368
370
369 def emailuser(user):
371 def emailuser(user):
370 """Return the user portion of an email address."""
372 """Return the user portion of an email address."""
371 f = user.find('@')
373 f = user.find('@')
372 if f >= 0:
374 if f >= 0:
373 user = user[:f]
375 user = user[:f]
374 f = user.find('<')
376 f = user.find('<')
375 if f >= 0:
377 if f >= 0:
376 user = user[f + 1:]
378 user = user[f + 1:]
377 return user
379 return user
378
380
379 def email(author):
381 def email(author):
380 '''get email of author.'''
382 '''get email of author.'''
381 r = author.find('>')
383 r = author.find('>')
382 if r == -1:
384 if r == -1:
383 r = None
385 r = None
384 return author[author.find('<') + 1:r]
386 return author[author.find('<') + 1:r]
385
387
386 def person(author):
388 def person(author):
387 """Returns the name before an email address,
389 """Returns the name before an email address,
388 interpreting it as per RFC 5322
390 interpreting it as per RFC 5322
389
391
390 >>> person(b'foo@bar')
392 >>> person(b'foo@bar')
391 'foo'
393 'foo'
392 >>> person(b'Foo Bar <foo@bar>')
394 >>> person(b'Foo Bar <foo@bar>')
393 'Foo Bar'
395 'Foo Bar'
394 >>> person(b'"Foo Bar" <foo@bar>')
396 >>> person(b'"Foo Bar" <foo@bar>')
395 'Foo Bar'
397 'Foo Bar'
396 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
398 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
397 'Foo "buz" Bar'
399 'Foo "buz" Bar'
398 >>> # The following are invalid, but do exist in real-life
400 >>> # The following are invalid, but do exist in real-life
399 ...
401 ...
400 >>> person(b'Foo "buz" Bar <foo@bar>')
402 >>> person(b'Foo "buz" Bar <foo@bar>')
401 'Foo "buz" Bar'
403 'Foo "buz" Bar'
402 >>> person(b'"Foo Bar <foo@bar>')
404 >>> person(b'"Foo Bar <foo@bar>')
403 'Foo Bar'
405 'Foo Bar'
404 """
406 """
405 if '@' not in author:
407 if '@' not in author:
406 return author
408 return author
407 f = author.find('<')
409 f = author.find('<')
408 if f != -1:
410 if f != -1:
409 return author[:f].strip(' "').replace('\\"', '"')
411 return author[:f].strip(' "').replace('\\"', '"')
410 f = author.find('@')
412 f = author.find('@')
411 return author[:f].replace('.', ' ')
413 return author[:f].replace('.', ' ')
412
414
413 @attr.s(hash=True)
415 @attr.s(hash=True)
414 class mailmapping(object):
416 class mailmapping(object):
415 '''Represents a username/email key or value in
417 '''Represents a username/email key or value in
416 a mailmap file'''
418 a mailmap file'''
417 email = attr.ib()
419 email = attr.ib()
418 name = attr.ib(default=None)
420 name = attr.ib(default=None)
419
421
420 def _ismailmaplineinvalid(names, emails):
422 def _ismailmaplineinvalid(names, emails):
421 '''Returns True if the parsed names and emails
423 '''Returns True if the parsed names and emails
422 in a mailmap entry are invalid.
424 in a mailmap entry are invalid.
423
425
424 >>> # No names or emails fails
426 >>> # No names or emails fails
425 >>> names, emails = [], []
427 >>> names, emails = [], []
426 >>> _ismailmaplineinvalid(names, emails)
428 >>> _ismailmaplineinvalid(names, emails)
427 True
429 True
428 >>> # Only one email fails
430 >>> # Only one email fails
429 >>> emails = [b'email@email.com']
431 >>> emails = [b'email@email.com']
430 >>> _ismailmaplineinvalid(names, emails)
432 >>> _ismailmaplineinvalid(names, emails)
431 True
433 True
432 >>> # One email and one name passes
434 >>> # One email and one name passes
433 >>> names = [b'Test Name']
435 >>> names = [b'Test Name']
434 >>> _ismailmaplineinvalid(names, emails)
436 >>> _ismailmaplineinvalid(names, emails)
435 False
437 False
436 >>> # No names but two emails passes
438 >>> # No names but two emails passes
437 >>> names = []
439 >>> names = []
438 >>> emails = [b'proper@email.com', b'commit@email.com']
440 >>> emails = [b'proper@email.com', b'commit@email.com']
439 >>> _ismailmaplineinvalid(names, emails)
441 >>> _ismailmaplineinvalid(names, emails)
440 False
442 False
441 '''
443 '''
442 return not emails or not names and len(emails) < 2
444 return not emails or not names and len(emails) < 2
443
445
444 def parsemailmap(mailmapcontent):
446 def parsemailmap(mailmapcontent):
445 """Parses data in the .mailmap format
447 """Parses data in the .mailmap format
446
448
447 >>> mmdata = b"\\n".join([
449 >>> mmdata = b"\\n".join([
448 ... b'# Comment',
450 ... b'# Comment',
449 ... b'Name <commit1@email.xx>',
451 ... b'Name <commit1@email.xx>',
450 ... b'<name@email.xx> <commit2@email.xx>',
452 ... b'<name@email.xx> <commit2@email.xx>',
451 ... b'Name <proper@email.xx> <commit3@email.xx>',
453 ... b'Name <proper@email.xx> <commit3@email.xx>',
452 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
454 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
453 ... ])
455 ... ])
454 >>> mm = parsemailmap(mmdata)
456 >>> mm = parsemailmap(mmdata)
455 >>> for key in sorted(mm.keys()):
457 >>> for key in sorted(mm.keys()):
456 ... print(key)
458 ... print(key)
457 mailmapping(email='commit1@email.xx', name=None)
459 mailmapping(email='commit1@email.xx', name=None)
458 mailmapping(email='commit2@email.xx', name=None)
460 mailmapping(email='commit2@email.xx', name=None)
459 mailmapping(email='commit3@email.xx', name=None)
461 mailmapping(email='commit3@email.xx', name=None)
460 mailmapping(email='commit4@email.xx', name='Commit')
462 mailmapping(email='commit4@email.xx', name='Commit')
461 >>> for val in sorted(mm.values()):
463 >>> for val in sorted(mm.values()):
462 ... print(val)
464 ... print(val)
463 mailmapping(email='commit1@email.xx', name='Name')
465 mailmapping(email='commit1@email.xx', name='Name')
464 mailmapping(email='name@email.xx', name=None)
466 mailmapping(email='name@email.xx', name=None)
465 mailmapping(email='proper@email.xx', name='Name')
467 mailmapping(email='proper@email.xx', name='Name')
466 mailmapping(email='proper@email.xx', name='Name')
468 mailmapping(email='proper@email.xx', name='Name')
467 """
469 """
468 mailmap = {}
470 mailmap = {}
469
471
470 if mailmapcontent is None:
472 if mailmapcontent is None:
471 return mailmap
473 return mailmap
472
474
473 for line in mailmapcontent.splitlines():
475 for line in mailmapcontent.splitlines():
474
476
475 # Don't bother checking the line if it is a comment or
477 # Don't bother checking the line if it is a comment or
476 # is an improperly formed author field
478 # is an improperly formed author field
477 if line.lstrip().startswith('#'):
479 if line.lstrip().startswith('#'):
478 continue
480 continue
479
481
480 # names, emails hold the parsed emails and names for each line
482 # names, emails hold the parsed emails and names for each line
481 # name_builder holds the words in a persons name
483 # name_builder holds the words in a persons name
482 names, emails = [], []
484 names, emails = [], []
483 namebuilder = []
485 namebuilder = []
484
486
485 for element in line.split():
487 for element in line.split():
486 if element.startswith('#'):
488 if element.startswith('#'):
487 # If we reach a comment in the mailmap file, move on
489 # If we reach a comment in the mailmap file, move on
488 break
490 break
489
491
490 elif element.startswith('<') and element.endswith('>'):
492 elif element.startswith('<') and element.endswith('>'):
491 # We have found an email.
493 # We have found an email.
492 # Parse it, and finalize any names from earlier
494 # Parse it, and finalize any names from earlier
493 emails.append(element[1:-1]) # Slice off the "<>"
495 emails.append(element[1:-1]) # Slice off the "<>"
494
496
495 if namebuilder:
497 if namebuilder:
496 names.append(' '.join(namebuilder))
498 names.append(' '.join(namebuilder))
497 namebuilder = []
499 namebuilder = []
498
500
499 # Break if we have found a second email, any other
501 # Break if we have found a second email, any other
500 # data does not fit the spec for .mailmap
502 # data does not fit the spec for .mailmap
501 if len(emails) > 1:
503 if len(emails) > 1:
502 break
504 break
503
505
504 else:
506 else:
505 # We have found another word in the committers name
507 # We have found another word in the committers name
506 namebuilder.append(element)
508 namebuilder.append(element)
507
509
508 # Check to see if we have parsed the line into a valid form
510 # Check to see if we have parsed the line into a valid form
509 # We require at least one email, and either at least one
511 # We require at least one email, and either at least one
510 # name or a second email
512 # name or a second email
511 if _ismailmaplineinvalid(names, emails):
513 if _ismailmaplineinvalid(names, emails):
512 continue
514 continue
513
515
514 mailmapkey = mailmapping(
516 mailmapkey = mailmapping(
515 email=emails[-1],
517 email=emails[-1],
516 name=names[-1] if len(names) == 2 else None,
518 name=names[-1] if len(names) == 2 else None,
517 )
519 )
518
520
519 mailmap[mailmapkey] = mailmapping(
521 mailmap[mailmapkey] = mailmapping(
520 email=emails[0],
522 email=emails[0],
521 name=names[0] if names else None,
523 name=names[0] if names else None,
522 )
524 )
523
525
524 return mailmap
526 return mailmap
525
527
526 def mapname(mailmap, author):
528 def mapname(mailmap, author):
527 """Returns the author field according to the mailmap cache, or
529 """Returns the author field according to the mailmap cache, or
528 the original author field.
530 the original author field.
529
531
530 >>> mmdata = b"\\n".join([
532 >>> mmdata = b"\\n".join([
531 ... b'# Comment',
533 ... b'# Comment',
532 ... b'Name <commit1@email.xx>',
534 ... b'Name <commit1@email.xx>',
533 ... b'<name@email.xx> <commit2@email.xx>',
535 ... b'<name@email.xx> <commit2@email.xx>',
534 ... b'Name <proper@email.xx> <commit3@email.xx>',
536 ... b'Name <proper@email.xx> <commit3@email.xx>',
535 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
537 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
536 ... ])
538 ... ])
537 >>> m = parsemailmap(mmdata)
539 >>> m = parsemailmap(mmdata)
538 >>> mapname(m, b'Commit <commit1@email.xx>')
540 >>> mapname(m, b'Commit <commit1@email.xx>')
539 'Name <commit1@email.xx>'
541 'Name <commit1@email.xx>'
540 >>> mapname(m, b'Name <commit2@email.xx>')
542 >>> mapname(m, b'Name <commit2@email.xx>')
541 'Name <name@email.xx>'
543 'Name <name@email.xx>'
542 >>> mapname(m, b'Commit <commit3@email.xx>')
544 >>> mapname(m, b'Commit <commit3@email.xx>')
543 'Name <proper@email.xx>'
545 'Name <proper@email.xx>'
544 >>> mapname(m, b'Commit <commit4@email.xx>')
546 >>> mapname(m, b'Commit <commit4@email.xx>')
545 'Name <proper@email.xx>'
547 'Name <proper@email.xx>'
546 >>> mapname(m, b'Unknown Name <unknown@email.com>')
548 >>> mapname(m, b'Unknown Name <unknown@email.com>')
547 'Unknown Name <unknown@email.com>'
549 'Unknown Name <unknown@email.com>'
548 """
550 """
549 # If the author field coming in isn't in the correct format,
551 # If the author field coming in isn't in the correct format,
550 # or the mailmap is empty just return the original author field
552 # or the mailmap is empty just return the original author field
551 if not isauthorwellformed(author) or not mailmap:
553 if not isauthorwellformed(author) or not mailmap:
552 return author
554 return author
553
555
554 # Turn the user name into a mailmapping
556 # Turn the user name into a mailmapping
555 commit = mailmapping(name=person(author), email=email(author))
557 commit = mailmapping(name=person(author), email=email(author))
556
558
557 try:
559 try:
558 # Try and use both the commit email and name as the key
560 # Try and use both the commit email and name as the key
559 proper = mailmap[commit]
561 proper = mailmap[commit]
560
562
561 except KeyError:
563 except KeyError:
562 # If the lookup fails, use just the email as the key instead
564 # If the lookup fails, use just the email as the key instead
563 # We call this commit2 as not to erase original commit fields
565 # We call this commit2 as not to erase original commit fields
564 commit2 = mailmapping(email=commit.email)
566 commit2 = mailmapping(email=commit.email)
565 proper = mailmap.get(commit2, mailmapping(None, None))
567 proper = mailmap.get(commit2, mailmapping(None, None))
566
568
567 # Return the author field with proper values filled in
569 # Return the author field with proper values filled in
568 return '%s <%s>' % (
570 return '%s <%s>' % (
569 proper.name if proper.name else commit.name,
571 proper.name if proper.name else commit.name,
570 proper.email if proper.email else commit.email,
572 proper.email if proper.email else commit.email,
571 )
573 )
572
574
573 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
575 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
574
576
575 def isauthorwellformed(author):
577 def isauthorwellformed(author):
576 '''Return True if the author field is well formed
578 '''Return True if the author field is well formed
577 (ie "Contributor Name <contrib@email.dom>")
579 (ie "Contributor Name <contrib@email.dom>")
578
580
579 >>> isauthorwellformed(b'Good Author <good@author.com>')
581 >>> isauthorwellformed(b'Good Author <good@author.com>')
580 True
582 True
581 >>> isauthorwellformed(b'Author <good@author.com>')
583 >>> isauthorwellformed(b'Author <good@author.com>')
582 True
584 True
583 >>> isauthorwellformed(b'Bad Author')
585 >>> isauthorwellformed(b'Bad Author')
584 False
586 False
585 >>> isauthorwellformed(b'Bad Author <author@author.com')
587 >>> isauthorwellformed(b'Bad Author <author@author.com')
586 False
588 False
587 >>> isauthorwellformed(b'Bad Author author@author.com')
589 >>> isauthorwellformed(b'Bad Author author@author.com')
588 False
590 False
589 >>> isauthorwellformed(b'<author@author.com>')
591 >>> isauthorwellformed(b'<author@author.com>')
590 False
592 False
591 >>> isauthorwellformed(b'Bad Author <author>')
593 >>> isauthorwellformed(b'Bad Author <author>')
592 False
594 False
593 '''
595 '''
594 return _correctauthorformat.match(author) is not None
596 return _correctauthorformat.match(author) is not None
595
597
596 def ellipsis(text, maxlength=400):
598 def ellipsis(text, maxlength=400):
597 """Trim string to at most maxlength (default: 400) columns in display."""
599 """Trim string to at most maxlength (default: 400) columns in display."""
598 return encoding.trim(text, maxlength, ellipsis='...')
600 return encoding.trim(text, maxlength, ellipsis='...')
599
601
600 def escapestr(s):
602 def escapestr(s):
601 if isinstance(s, memoryview):
603 if isinstance(s, memoryview):
602 s = bytes(s)
604 s = bytes(s)
603 # call underlying function of s.encode('string_escape') directly for
605 # call underlying function of s.encode('string_escape') directly for
604 # Python 3 compatibility
606 # Python 3 compatibility
605 return codecs.escape_encode(s)[0]
607 return codecs.escape_encode(s)[0]
606
608
607 def unescapestr(s):
609 def unescapestr(s):
608 return codecs.escape_decode(s)[0]
610 return codecs.escape_decode(s)[0]
609
611
610 def forcebytestr(obj):
612 def forcebytestr(obj):
611 """Portably format an arbitrary object (e.g. exception) into a byte
613 """Portably format an arbitrary object (e.g. exception) into a byte
612 string."""
614 string."""
613 try:
615 try:
614 return pycompat.bytestr(obj)
616 return pycompat.bytestr(obj)
615 except UnicodeEncodeError:
617 except UnicodeEncodeError:
616 # non-ascii string, may be lossy
618 # non-ascii string, may be lossy
617 return pycompat.bytestr(encoding.strtolocal(str(obj)))
619 return pycompat.bytestr(encoding.strtolocal(str(obj)))
618
620
619 def uirepr(s):
621 def uirepr(s):
620 # Avoid double backslash in Windows path repr()
622 # Avoid double backslash in Windows path repr()
621 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
623 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
622
624
623 # delay import of textwrap
625 # delay import of textwrap
624 def _MBTextWrapper(**kwargs):
626 def _MBTextWrapper(**kwargs):
625 class tw(textwrap.TextWrapper):
627 class tw(textwrap.TextWrapper):
626 """
628 """
627 Extend TextWrapper for width-awareness.
629 Extend TextWrapper for width-awareness.
628
630
629 Neither number of 'bytes' in any encoding nor 'characters' is
631 Neither number of 'bytes' in any encoding nor 'characters' is
630 appropriate to calculate terminal columns for specified string.
632 appropriate to calculate terminal columns for specified string.
631
633
632 Original TextWrapper implementation uses built-in 'len()' directly,
634 Original TextWrapper implementation uses built-in 'len()' directly,
633 so overriding is needed to use width information of each characters.
635 so overriding is needed to use width information of each characters.
634
636
635 In addition, characters classified into 'ambiguous' width are
637 In addition, characters classified into 'ambiguous' width are
636 treated as wide in East Asian area, but as narrow in other.
638 treated as wide in East Asian area, but as narrow in other.
637
639
638 This requires use decision to determine width of such characters.
640 This requires use decision to determine width of such characters.
639 """
641 """
640 def _cutdown(self, ucstr, space_left):
642 def _cutdown(self, ucstr, space_left):
641 l = 0
643 l = 0
642 colwidth = encoding.ucolwidth
644 colwidth = encoding.ucolwidth
643 for i in pycompat.xrange(len(ucstr)):
645 for i in pycompat.xrange(len(ucstr)):
644 l += colwidth(ucstr[i])
646 l += colwidth(ucstr[i])
645 if space_left < l:
647 if space_left < l:
646 return (ucstr[:i], ucstr[i:])
648 return (ucstr[:i], ucstr[i:])
647 return ucstr, ''
649 return ucstr, ''
648
650
649 # overriding of base class
651 # overriding of base class
650 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
652 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
651 space_left = max(width - cur_len, 1)
653 space_left = max(width - cur_len, 1)
652
654
653 if self.break_long_words:
655 if self.break_long_words:
654 cut, res = self._cutdown(reversed_chunks[-1], space_left)
656 cut, res = self._cutdown(reversed_chunks[-1], space_left)
655 cur_line.append(cut)
657 cur_line.append(cut)
656 reversed_chunks[-1] = res
658 reversed_chunks[-1] = res
657 elif not cur_line:
659 elif not cur_line:
658 cur_line.append(reversed_chunks.pop())
660 cur_line.append(reversed_chunks.pop())
659
661
660 # this overriding code is imported from TextWrapper of Python 2.6
662 # this overriding code is imported from TextWrapper of Python 2.6
661 # to calculate columns of string by 'encoding.ucolwidth()'
663 # to calculate columns of string by 'encoding.ucolwidth()'
662 def _wrap_chunks(self, chunks):
664 def _wrap_chunks(self, chunks):
663 colwidth = encoding.ucolwidth
665 colwidth = encoding.ucolwidth
664
666
665 lines = []
667 lines = []
666 if self.width <= 0:
668 if self.width <= 0:
667 raise ValueError("invalid width %r (must be > 0)" % self.width)
669 raise ValueError("invalid width %r (must be > 0)" % self.width)
668
670
669 # Arrange in reverse order so items can be efficiently popped
671 # Arrange in reverse order so items can be efficiently popped
670 # from a stack of chucks.
672 # from a stack of chucks.
671 chunks.reverse()
673 chunks.reverse()
672
674
673 while chunks:
675 while chunks:
674
676
675 # Start the list of chunks that will make up the current line.
677 # Start the list of chunks that will make up the current line.
676 # cur_len is just the length of all the chunks in cur_line.
678 # cur_len is just the length of all the chunks in cur_line.
677 cur_line = []
679 cur_line = []
678 cur_len = 0
680 cur_len = 0
679
681
680 # Figure out which static string will prefix this line.
682 # Figure out which static string will prefix this line.
681 if lines:
683 if lines:
682 indent = self.subsequent_indent
684 indent = self.subsequent_indent
683 else:
685 else:
684 indent = self.initial_indent
686 indent = self.initial_indent
685
687
686 # Maximum width for this line.
688 # Maximum width for this line.
687 width = self.width - len(indent)
689 width = self.width - len(indent)
688
690
689 # First chunk on line is whitespace -- drop it, unless this
691 # First chunk on line is whitespace -- drop it, unless this
690 # is the very beginning of the text (i.e. no lines started yet).
692 # is the very beginning of the text (i.e. no lines started yet).
691 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
693 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
692 del chunks[-1]
694 del chunks[-1]
693
695
694 while chunks:
696 while chunks:
695 l = colwidth(chunks[-1])
697 l = colwidth(chunks[-1])
696
698
697 # Can at least squeeze this chunk onto the current line.
699 # Can at least squeeze this chunk onto the current line.
698 if cur_len + l <= width:
700 if cur_len + l <= width:
699 cur_line.append(chunks.pop())
701 cur_line.append(chunks.pop())
700 cur_len += l
702 cur_len += l
701
703
702 # Nope, this line is full.
704 # Nope, this line is full.
703 else:
705 else:
704 break
706 break
705
707
706 # The current line is full, and the next chunk is too big to
708 # The current line is full, and the next chunk is too big to
707 # fit on *any* line (not just this one).
709 # fit on *any* line (not just this one).
708 if chunks and colwidth(chunks[-1]) > width:
710 if chunks and colwidth(chunks[-1]) > width:
709 self._handle_long_word(chunks, cur_line, cur_len, width)
711 self._handle_long_word(chunks, cur_line, cur_len, width)
710
712
711 # If the last chunk on this line is all whitespace, drop it.
713 # If the last chunk on this line is all whitespace, drop it.
712 if (self.drop_whitespace and
714 if (self.drop_whitespace and
713 cur_line and cur_line[-1].strip() == r''):
715 cur_line and cur_line[-1].strip() == r''):
714 del cur_line[-1]
716 del cur_line[-1]
715
717
716 # Convert current line back to a string and store it in list
718 # Convert current line back to a string and store it in list
717 # of all lines (return value).
719 # of all lines (return value).
718 if cur_line:
720 if cur_line:
719 lines.append(indent + r''.join(cur_line))
721 lines.append(indent + r''.join(cur_line))
720
722
721 return lines
723 return lines
722
724
723 global _MBTextWrapper
725 global _MBTextWrapper
724 _MBTextWrapper = tw
726 _MBTextWrapper = tw
725 return tw(**kwargs)
727 return tw(**kwargs)
726
728
727 def wrap(line, width, initindent='', hangindent=''):
729 def wrap(line, width, initindent='', hangindent=''):
728 maxindent = max(len(hangindent), len(initindent))
730 maxindent = max(len(hangindent), len(initindent))
729 if width <= maxindent:
731 if width <= maxindent:
730 # adjust for weird terminal size
732 # adjust for weird terminal size
731 width = max(78, maxindent + 1)
733 width = max(78, maxindent + 1)
732 line = line.decode(pycompat.sysstr(encoding.encoding),
734 line = line.decode(pycompat.sysstr(encoding.encoding),
733 pycompat.sysstr(encoding.encodingmode))
735 pycompat.sysstr(encoding.encodingmode))
734 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
736 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
735 pycompat.sysstr(encoding.encodingmode))
737 pycompat.sysstr(encoding.encodingmode))
736 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
738 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
737 pycompat.sysstr(encoding.encodingmode))
739 pycompat.sysstr(encoding.encodingmode))
738 wrapper = _MBTextWrapper(width=width,
740 wrapper = _MBTextWrapper(width=width,
739 initial_indent=initindent,
741 initial_indent=initindent,
740 subsequent_indent=hangindent)
742 subsequent_indent=hangindent)
741 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
743 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
742
744
743 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
745 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
744 '0': False, 'no': False, 'false': False, 'off': False,
746 '0': False, 'no': False, 'false': False, 'off': False,
745 'never': False}
747 'never': False}
746
748
747 def parsebool(s):
749 def parsebool(s):
748 """Parse s into a boolean.
750 """Parse s into a boolean.
749
751
750 If s is not a valid boolean, returns None.
752 If s is not a valid boolean, returns None.
751 """
753 """
752 return _booleans.get(s.lower(), None)
754 return _booleans.get(s.lower(), None)
753
755
754 def evalpythonliteral(s):
756 def evalpythonliteral(s):
755 """Evaluate a string containing a Python literal expression"""
757 """Evaluate a string containing a Python literal expression"""
756 # We could backport our tokenizer hack to rewrite '' to u'' if we want
758 # We could backport our tokenizer hack to rewrite '' to u'' if we want
757 if pycompat.ispy3:
759 if pycompat.ispy3:
758 return ast.literal_eval(s.decode('latin1'))
760 return ast.literal_eval(s.decode('latin1'))
759 return ast.literal_eval(s)
761 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now