##// END OF EJS Templates
utils: accept bytearray arguments for escapestr
Joerg Sonnenberger -
r52725:4eccb65e default
parent child Browse files
Show More
@@ -1,1006 +1,1006 b''
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10
10
11 import ast
11 import ast
12 import codecs
12 import codecs
13 import re as remod
13 import re as remod
14 import textwrap
14 import textwrap
15 import types
15 import types
16 import typing
16 import typing
17
17
18 from typing import (
18 from typing import (
19 Optional,
19 Optional,
20 overload,
20 overload,
21 )
21 )
22
22
23 from ..i18n import _
23 from ..i18n import _
24 from ..thirdparty import attr
24 from ..thirdparty import attr
25
25
26 # Force pytype to use the non-vendored package
26 # Force pytype to use the non-vendored package
27 if typing.TYPE_CHECKING:
27 if typing.TYPE_CHECKING:
28 # noinspection PyPackageRequirements
28 # noinspection PyPackageRequirements
29 import attr
29 import attr
30
30
31 from .. import (
31 from .. import (
32 encoding,
32 encoding,
33 error,
33 error,
34 pycompat,
34 pycompat,
35 )
35 )
36
36
37 # regex special chars pulled from https://bugs.python.org/issue29995
37 # regex special chars pulled from https://bugs.python.org/issue29995
38 # which was part of Python 3.7.
38 # which was part of Python 3.7.
39 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
39 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
40 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
40 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
41 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
41 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
42
42
43
43
44 @overload
44 @overload
45 def reescape(pat: bytes) -> bytes:
45 def reescape(pat: bytes) -> bytes:
46 ...
46 ...
47
47
48
48
49 @overload
49 @overload
50 def reescape(pat: str) -> str:
50 def reescape(pat: str) -> str:
51 ...
51 ...
52
52
53
53
54 def reescape(pat):
54 def reescape(pat):
55 """Drop-in replacement for re.escape."""
55 """Drop-in replacement for re.escape."""
56 # NOTE: it is intentional that this works on unicodes and not
56 # NOTE: it is intentional that this works on unicodes and not
57 # bytes, as it's only possible to do the escaping with
57 # bytes, as it's only possible to do the escaping with
58 # unicode.translate, not bytes.translate. Sigh.
58 # unicode.translate, not bytes.translate. Sigh.
59 wantuni = True
59 wantuni = True
60 if isinstance(pat, bytes):
60 if isinstance(pat, bytes):
61 wantuni = False
61 wantuni = False
62 pat = pat.decode('latin1')
62 pat = pat.decode('latin1')
63 pat = pat.translate(_regexescapemap)
63 pat = pat.translate(_regexescapemap)
64 if wantuni:
64 if wantuni:
65 return pat
65 return pat
66 return pat.encode('latin1')
66 return pat.encode('latin1')
67
67
68
68
69 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
69 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
70 """Pretty print an object."""
70 """Pretty print an object."""
71 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
71 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
72
72
73
73
74 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
74 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
75 """Pretty print an object to a generator of atoms.
75 """Pretty print an object to a generator of atoms.
76
76
77 ``bprefix`` is a flag influencing whether bytestrings are preferred with
77 ``bprefix`` is a flag influencing whether bytestrings are preferred with
78 a ``b''`` prefix.
78 a ``b''`` prefix.
79
79
80 ``indent`` controls whether collections and nested data structures
80 ``indent`` controls whether collections and nested data structures
81 span multiple lines via the indentation amount in spaces. By default,
81 span multiple lines via the indentation amount in spaces. By default,
82 no newlines are emitted.
82 no newlines are emitted.
83
83
84 ``level`` specifies the initial indent level. Used if ``indent > 0``.
84 ``level`` specifies the initial indent level. Used if ``indent > 0``.
85 """
85 """
86
86
87 if isinstance(o, bytes):
87 if isinstance(o, bytes):
88 if bprefix:
88 if bprefix:
89 yield b"b'%s'" % escapestr(o)
89 yield b"b'%s'" % escapestr(o)
90 else:
90 else:
91 yield b"'%s'" % escapestr(o)
91 yield b"'%s'" % escapestr(o)
92 elif isinstance(o, bytearray):
92 elif isinstance(o, bytearray):
93 # codecs.escape_encode() can't handle bytearray, so escapestr fails
93 # codecs.escape_encode() can't handle bytearray, so escapestr fails
94 # without coercion.
94 # without coercion.
95 yield b"bytearray['%s']" % escapestr(bytes(o))
95 yield b"bytearray['%s']" % escapestr(bytes(o))
96 elif isinstance(o, list):
96 elif isinstance(o, list):
97 if not o:
97 if not o:
98 yield b'[]'
98 yield b'[]'
99 return
99 return
100
100
101 yield b'['
101 yield b'['
102
102
103 if indent:
103 if indent:
104 level += 1
104 level += 1
105 yield b'\n'
105 yield b'\n'
106 yield b' ' * (level * indent)
106 yield b' ' * (level * indent)
107
107
108 for i, a in enumerate(o):
108 for i, a in enumerate(o):
109 for chunk in pprintgen(
109 for chunk in pprintgen(
110 a, bprefix=bprefix, indent=indent, level=level
110 a, bprefix=bprefix, indent=indent, level=level
111 ):
111 ):
112 yield chunk
112 yield chunk
113
113
114 if i + 1 < len(o):
114 if i + 1 < len(o):
115 if indent:
115 if indent:
116 yield b',\n'
116 yield b',\n'
117 yield b' ' * (level * indent)
117 yield b' ' * (level * indent)
118 else:
118 else:
119 yield b', '
119 yield b', '
120
120
121 if indent:
121 if indent:
122 level -= 1
122 level -= 1
123 yield b'\n'
123 yield b'\n'
124 yield b' ' * (level * indent)
124 yield b' ' * (level * indent)
125
125
126 yield b']'
126 yield b']'
127 elif isinstance(o, dict):
127 elif isinstance(o, dict):
128 if not o:
128 if not o:
129 yield b'{}'
129 yield b'{}'
130 return
130 return
131
131
132 yield b'{'
132 yield b'{'
133
133
134 if indent:
134 if indent:
135 level += 1
135 level += 1
136 yield b'\n'
136 yield b'\n'
137 yield b' ' * (level * indent)
137 yield b' ' * (level * indent)
138
138
139 for i, (k, v) in enumerate(sorted(o.items())):
139 for i, (k, v) in enumerate(sorted(o.items())):
140 for chunk in pprintgen(
140 for chunk in pprintgen(
141 k, bprefix=bprefix, indent=indent, level=level
141 k, bprefix=bprefix, indent=indent, level=level
142 ):
142 ):
143 yield chunk
143 yield chunk
144
144
145 yield b': '
145 yield b': '
146
146
147 for chunk in pprintgen(
147 for chunk in pprintgen(
148 v, bprefix=bprefix, indent=indent, level=level
148 v, bprefix=bprefix, indent=indent, level=level
149 ):
149 ):
150 yield chunk
150 yield chunk
151
151
152 if i + 1 < len(o):
152 if i + 1 < len(o):
153 if indent:
153 if indent:
154 yield b',\n'
154 yield b',\n'
155 yield b' ' * (level * indent)
155 yield b' ' * (level * indent)
156 else:
156 else:
157 yield b', '
157 yield b', '
158
158
159 if indent:
159 if indent:
160 level -= 1
160 level -= 1
161 yield b'\n'
161 yield b'\n'
162 yield b' ' * (level * indent)
162 yield b' ' * (level * indent)
163
163
164 yield b'}'
164 yield b'}'
165 elif isinstance(o, set):
165 elif isinstance(o, set):
166 if not o:
166 if not o:
167 yield b'set([])'
167 yield b'set([])'
168 return
168 return
169
169
170 yield b'set(['
170 yield b'set(['
171
171
172 if indent:
172 if indent:
173 level += 1
173 level += 1
174 yield b'\n'
174 yield b'\n'
175 yield b' ' * (level * indent)
175 yield b' ' * (level * indent)
176
176
177 for i, k in enumerate(sorted(o)):
177 for i, k in enumerate(sorted(o)):
178 for chunk in pprintgen(
178 for chunk in pprintgen(
179 k, bprefix=bprefix, indent=indent, level=level
179 k, bprefix=bprefix, indent=indent, level=level
180 ):
180 ):
181 yield chunk
181 yield chunk
182
182
183 if i + 1 < len(o):
183 if i + 1 < len(o):
184 if indent:
184 if indent:
185 yield b',\n'
185 yield b',\n'
186 yield b' ' * (level * indent)
186 yield b' ' * (level * indent)
187 else:
187 else:
188 yield b', '
188 yield b', '
189
189
190 if indent:
190 if indent:
191 level -= 1
191 level -= 1
192 yield b'\n'
192 yield b'\n'
193 yield b' ' * (level * indent)
193 yield b' ' * (level * indent)
194
194
195 yield b'])'
195 yield b'])'
196 elif isinstance(o, tuple):
196 elif isinstance(o, tuple):
197 if not o:
197 if not o:
198 yield b'()'
198 yield b'()'
199 return
199 return
200
200
201 yield b'('
201 yield b'('
202
202
203 if indent:
203 if indent:
204 level += 1
204 level += 1
205 yield b'\n'
205 yield b'\n'
206 yield b' ' * (level * indent)
206 yield b' ' * (level * indent)
207
207
208 for i, a in enumerate(o):
208 for i, a in enumerate(o):
209 for chunk in pprintgen(
209 for chunk in pprintgen(
210 a, bprefix=bprefix, indent=indent, level=level
210 a, bprefix=bprefix, indent=indent, level=level
211 ):
211 ):
212 yield chunk
212 yield chunk
213
213
214 if i + 1 < len(o):
214 if i + 1 < len(o):
215 if indent:
215 if indent:
216 yield b',\n'
216 yield b',\n'
217 yield b' ' * (level * indent)
217 yield b' ' * (level * indent)
218 else:
218 else:
219 yield b', '
219 yield b', '
220
220
221 if indent:
221 if indent:
222 level -= 1
222 level -= 1
223 yield b'\n'
223 yield b'\n'
224 yield b' ' * (level * indent)
224 yield b' ' * (level * indent)
225
225
226 yield b')'
226 yield b')'
227 elif isinstance(o, types.GeneratorType):
227 elif isinstance(o, types.GeneratorType):
228 # Special case of empty generator.
228 # Special case of empty generator.
229 try:
229 try:
230 nextitem = next(o)
230 nextitem = next(o)
231 except StopIteration:
231 except StopIteration:
232 yield b'gen[]'
232 yield b'gen[]'
233 return
233 return
234
234
235 yield b'gen['
235 yield b'gen['
236
236
237 if indent:
237 if indent:
238 level += 1
238 level += 1
239 yield b'\n'
239 yield b'\n'
240 yield b' ' * (level * indent)
240 yield b' ' * (level * indent)
241
241
242 last = False
242 last = False
243
243
244 while not last:
244 while not last:
245 current = nextitem
245 current = nextitem
246
246
247 try:
247 try:
248 nextitem = next(o)
248 nextitem = next(o)
249 except StopIteration:
249 except StopIteration:
250 last = True
250 last = True
251
251
252 for chunk in pprintgen(
252 for chunk in pprintgen(
253 current, bprefix=bprefix, indent=indent, level=level
253 current, bprefix=bprefix, indent=indent, level=level
254 ):
254 ):
255 yield chunk
255 yield chunk
256
256
257 if not last:
257 if not last:
258 if indent:
258 if indent:
259 yield b',\n'
259 yield b',\n'
260 yield b' ' * (level * indent)
260 yield b' ' * (level * indent)
261 else:
261 else:
262 yield b', '
262 yield b', '
263
263
264 if indent:
264 if indent:
265 level -= 1
265 level -= 1
266 yield b'\n'
266 yield b'\n'
267 yield b' ' * (level * indent)
267 yield b' ' * (level * indent)
268
268
269 yield b']'
269 yield b']'
270 else:
270 else:
271 yield pycompat.byterepr(o)
271 yield pycompat.byterepr(o)
272
272
273
273
274 def prettyrepr(o) -> bytes:
274 def prettyrepr(o) -> bytes:
275 """Pretty print a representation of a possibly-nested object"""
275 """Pretty print a representation of a possibly-nested object"""
276 lines = []
276 lines = []
277 rs = pycompat.byterepr(o)
277 rs = pycompat.byterepr(o)
278 p0 = p1 = 0
278 p0 = p1 = 0
279 while p0 < len(rs):
279 while p0 < len(rs):
280 # '... field=<type ... field=<type ...'
280 # '... field=<type ... field=<type ...'
281 # ~~~~~~~~~~~~~~~~
281 # ~~~~~~~~~~~~~~~~
282 # p0 p1 q0 q1
282 # p0 p1 q0 q1
283 q0 = -1
283 q0 = -1
284 q1 = rs.find(b'<', p1 + 1)
284 q1 = rs.find(b'<', p1 + 1)
285 if q1 < 0:
285 if q1 < 0:
286 q1 = len(rs)
286 q1 = len(rs)
287 # pytype: disable=wrong-arg-count
287 # pytype: disable=wrong-arg-count
288 # TODO: figure out why pytype doesn't recognize the optional start
288 # TODO: figure out why pytype doesn't recognize the optional start
289 # arg
289 # arg
290 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
290 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
291 # pytype: enable=wrong-arg-count
291 # pytype: enable=wrong-arg-count
292 # backtrack for ' field=<'
292 # backtrack for ' field=<'
293 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
293 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
294 if q0 < 0:
294 if q0 < 0:
295 q0 = q1
295 q0 = q1
296 else:
296 else:
297 q0 += 1 # skip ' '
297 q0 += 1 # skip ' '
298 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
298 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
299 assert l >= 0
299 assert l >= 0
300 lines.append((l, rs[p0:q0].rstrip()))
300 lines.append((l, rs[p0:q0].rstrip()))
301 p0, p1 = q0, q1
301 p0, p1 = q0, q1
302 return b'\n'.join(b' ' * l + s for l, s in lines)
302 return b'\n'.join(b' ' * l + s for l, s in lines)
303
303
304
304
305 def buildrepr(r) -> bytes:
305 def buildrepr(r) -> bytes:
306 """Format an optional printable representation from unexpanded bits
306 """Format an optional printable representation from unexpanded bits
307
307
308 ======== =================================
308 ======== =================================
309 type(r) example
309 type(r) example
310 ======== =================================
310 ======== =================================
311 tuple ('<not %r>', other)
311 tuple ('<not %r>', other)
312 bytes '<branch closed>'
312 bytes '<branch closed>'
313 callable lambda: '<branch %r>' % sorted(b)
313 callable lambda: '<branch %r>' % sorted(b)
314 object other
314 object other
315 ======== =================================
315 ======== =================================
316 """
316 """
317 if r is None:
317 if r is None:
318 return b''
318 return b''
319 elif isinstance(r, tuple):
319 elif isinstance(r, tuple):
320 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
320 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
321 elif isinstance(r, bytes):
321 elif isinstance(r, bytes):
322 return r
322 return r
323 elif callable(r):
323 elif callable(r):
324 return r()
324 return r()
325 else:
325 else:
326 return pprint(r)
326 return pprint(r)
327
327
328
328
329 def binary(s: bytes) -> bool:
329 def binary(s: bytes) -> bool:
330 """return true if a string is binary data"""
330 """return true if a string is binary data"""
331 return bool(s and b'\0' in s)
331 return bool(s and b'\0' in s)
332
332
333
333
334 def _splitpattern(pattern: bytes):
334 def _splitpattern(pattern: bytes):
335 if pattern.startswith(b're:'):
335 if pattern.startswith(b're:'):
336 return b're', pattern[3:]
336 return b're', pattern[3:]
337 elif pattern.startswith(b'literal:'):
337 elif pattern.startswith(b'literal:'):
338 return b'literal', pattern[8:]
338 return b'literal', pattern[8:]
339 return b'literal', pattern
339 return b'literal', pattern
340
340
341
341
342 def stringmatcher(pattern: bytes, casesensitive: bool = True):
342 def stringmatcher(pattern: bytes, casesensitive: bool = True):
343 """
343 """
344 accepts a string, possibly starting with 're:' or 'literal:' prefix.
344 accepts a string, possibly starting with 're:' or 'literal:' prefix.
345 returns the matcher name, pattern, and matcher function.
345 returns the matcher name, pattern, and matcher function.
346 missing or unknown prefixes are treated as literal matches.
346 missing or unknown prefixes are treated as literal matches.
347
347
348 helper for tests:
348 helper for tests:
349 >>> def test(pattern, *tests):
349 >>> def test(pattern, *tests):
350 ... kind, pattern, matcher = stringmatcher(pattern)
350 ... kind, pattern, matcher = stringmatcher(pattern)
351 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
351 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
352 >>> def itest(pattern, *tests):
352 >>> def itest(pattern, *tests):
353 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
353 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
354 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
354 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
355
355
356 exact matching (no prefix):
356 exact matching (no prefix):
357 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
357 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
358 ('literal', 'abcdefg', [False, False, True])
358 ('literal', 'abcdefg', [False, False, True])
359
359
360 regex matching ('re:' prefix)
360 regex matching ('re:' prefix)
361 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
361 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
362 ('re', 'a.+b', [False, False, True])
362 ('re', 'a.+b', [False, False, True])
363
363
364 force exact matches ('literal:' prefix)
364 force exact matches ('literal:' prefix)
365 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
365 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
366 ('literal', 're:foobar', [False, True])
366 ('literal', 're:foobar', [False, True])
367
367
368 unknown prefixes are ignored and treated as literals
368 unknown prefixes are ignored and treated as literals
369 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
369 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
370 ('literal', 'foo:bar', [False, False, True])
370 ('literal', 'foo:bar', [False, False, True])
371
371
372 case insensitive regex matches
372 case insensitive regex matches
373 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
373 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
374 ('re', 'A.+b', [False, False, True])
374 ('re', 'A.+b', [False, False, True])
375
375
376 case insensitive literal matches
376 case insensitive literal matches
377 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
377 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
378 ('literal', 'ABCDEFG', [False, False, True])
378 ('literal', 'ABCDEFG', [False, False, True])
379 """
379 """
380 kind, pattern = _splitpattern(pattern)
380 kind, pattern = _splitpattern(pattern)
381 if kind == b're':
381 if kind == b're':
382 try:
382 try:
383 flags = 0
383 flags = 0
384 if not casesensitive:
384 if not casesensitive:
385 flags = remod.I
385 flags = remod.I
386 regex = remod.compile(pattern, flags)
386 regex = remod.compile(pattern, flags)
387 except remod.error as e:
387 except remod.error as e:
388 raise error.ParseError(
388 raise error.ParseError(
389 _(b'invalid regular expression: %s') % forcebytestr(e)
389 _(b'invalid regular expression: %s') % forcebytestr(e)
390 )
390 )
391 return kind, pattern, regex.search
391 return kind, pattern, regex.search
392 elif kind == b'literal':
392 elif kind == b'literal':
393 if casesensitive:
393 if casesensitive:
394 match = pattern.__eq__
394 match = pattern.__eq__
395 else:
395 else:
396 ipat = encoding.lower(pattern)
396 ipat = encoding.lower(pattern)
397 match = lambda s: ipat == encoding.lower(s)
397 match = lambda s: ipat == encoding.lower(s)
398 return kind, pattern, match
398 return kind, pattern, match
399
399
400 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
400 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
401
401
402
402
403 def substringregexp(pattern: bytes, flags: int = 0):
403 def substringregexp(pattern: bytes, flags: int = 0):
404 """Build a regexp object from a string pattern possibly starting with
404 """Build a regexp object from a string pattern possibly starting with
405 're:' or 'literal:' prefix.
405 're:' or 'literal:' prefix.
406
406
407 helper for tests:
407 helper for tests:
408 >>> def test(pattern, *tests):
408 >>> def test(pattern, *tests):
409 ... regexp = substringregexp(pattern)
409 ... regexp = substringregexp(pattern)
410 ... return [bool(regexp.search(t)) for t in tests]
410 ... return [bool(regexp.search(t)) for t in tests]
411 >>> def itest(pattern, *tests):
411 >>> def itest(pattern, *tests):
412 ... regexp = substringregexp(pattern, remod.I)
412 ... regexp = substringregexp(pattern, remod.I)
413 ... return [bool(regexp.search(t)) for t in tests]
413 ... return [bool(regexp.search(t)) for t in tests]
414
414
415 substring matching (no prefix):
415 substring matching (no prefix):
416 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
416 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
417 [False, False, True]
417 [False, False, True]
418
418
419 substring pattern should be escaped:
419 substring pattern should be escaped:
420 >>> substringregexp(b'.bc').pattern
420 >>> substringregexp(b'.bc').pattern
421 '\\\\.bc'
421 '\\\\.bc'
422 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
422 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
423 [False, False, False]
423 [False, False, False]
424
424
425 regex matching ('re:' prefix)
425 regex matching ('re:' prefix)
426 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
426 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
427 [False, False, True]
427 [False, False, True]
428
428
429 force substring matches ('literal:' prefix)
429 force substring matches ('literal:' prefix)
430 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
430 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
431 [False, True]
431 [False, True]
432
432
433 case insensitive literal matches
433 case insensitive literal matches
434 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
434 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
435 [False, False, True]
435 [False, False, True]
436
436
437 case insensitive regex matches
437 case insensitive regex matches
438 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
438 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
439 [False, False, True]
439 [False, False, True]
440 """
440 """
441 kind, pattern = _splitpattern(pattern)
441 kind, pattern = _splitpattern(pattern)
442 if kind == b're':
442 if kind == b're':
443 try:
443 try:
444 return remod.compile(pattern, flags)
444 return remod.compile(pattern, flags)
445 except remod.error as e:
445 except remod.error as e:
446 raise error.ParseError(
446 raise error.ParseError(
447 _(b'invalid regular expression: %s') % forcebytestr(e)
447 _(b'invalid regular expression: %s') % forcebytestr(e)
448 )
448 )
449 elif kind == b'literal':
449 elif kind == b'literal':
450 return remod.compile(remod.escape(pattern), flags)
450 return remod.compile(remod.escape(pattern), flags)
451
451
452 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
452 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
453
453
454
454
455 def shortuser(user: bytes) -> bytes:
455 def shortuser(user: bytes) -> bytes:
456 """Return a short representation of a user name or email address."""
456 """Return a short representation of a user name or email address."""
457 f = user.find(b'@')
457 f = user.find(b'@')
458 if f >= 0:
458 if f >= 0:
459 user = user[:f]
459 user = user[:f]
460 f = user.find(b'<')
460 f = user.find(b'<')
461 if f >= 0:
461 if f >= 0:
462 user = user[f + 1 :]
462 user = user[f + 1 :]
463 f = user.find(b' ')
463 f = user.find(b' ')
464 if f >= 0:
464 if f >= 0:
465 user = user[:f]
465 user = user[:f]
466 f = user.find(b'.')
466 f = user.find(b'.')
467 if f >= 0:
467 if f >= 0:
468 user = user[:f]
468 user = user[:f]
469 return user
469 return user
470
470
471
471
472 def emailuser(user: bytes) -> bytes:
472 def emailuser(user: bytes) -> bytes:
473 """Return the user portion of an email address."""
473 """Return the user portion of an email address."""
474 f = user.find(b'@')
474 f = user.find(b'@')
475 if f >= 0:
475 if f >= 0:
476 user = user[:f]
476 user = user[:f]
477 f = user.find(b'<')
477 f = user.find(b'<')
478 if f >= 0:
478 if f >= 0:
479 user = user[f + 1 :]
479 user = user[f + 1 :]
480 return user
480 return user
481
481
482
482
483 def email(author: bytes) -> bytes:
483 def email(author: bytes) -> bytes:
484 '''get email of author.'''
484 '''get email of author.'''
485 r = author.find(b'>')
485 r = author.find(b'>')
486 if r == -1:
486 if r == -1:
487 r = None
487 r = None
488 return author[author.find(b'<') + 1 : r]
488 return author[author.find(b'<') + 1 : r]
489
489
490
490
491 def person(author: bytes) -> bytes:
491 def person(author: bytes) -> bytes:
492 """Returns the name before an email address,
492 """Returns the name before an email address,
493 interpreting it as per RFC 5322
493 interpreting it as per RFC 5322
494
494
495 >>> person(b'foo@bar')
495 >>> person(b'foo@bar')
496 'foo'
496 'foo'
497 >>> person(b'Foo Bar <foo@bar>')
497 >>> person(b'Foo Bar <foo@bar>')
498 'Foo Bar'
498 'Foo Bar'
499 >>> person(b'"Foo Bar" <foo@bar>')
499 >>> person(b'"Foo Bar" <foo@bar>')
500 'Foo Bar'
500 'Foo Bar'
501 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
501 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
502 'Foo "buz" Bar'
502 'Foo "buz" Bar'
503 >>> # The following are invalid, but do exist in real-life
503 >>> # The following are invalid, but do exist in real-life
504 ...
504 ...
505 >>> person(b'Foo "buz" Bar <foo@bar>')
505 >>> person(b'Foo "buz" Bar <foo@bar>')
506 'Foo "buz" Bar'
506 'Foo "buz" Bar'
507 >>> person(b'"Foo Bar <foo@bar>')
507 >>> person(b'"Foo Bar <foo@bar>')
508 'Foo Bar'
508 'Foo Bar'
509 """
509 """
510 if b'@' not in author:
510 if b'@' not in author:
511 return author
511 return author
512 f = author.find(b'<')
512 f = author.find(b'<')
513 if f != -1:
513 if f != -1:
514 return author[:f].strip(b' "').replace(b'\\"', b'"')
514 return author[:f].strip(b' "').replace(b'\\"', b'"')
515 f = author.find(b'@')
515 f = author.find(b'@')
516 return author[:f].replace(b'.', b' ')
516 return author[:f].replace(b'.', b' ')
517
517
518
518
519 @attr.s(hash=True)
519 @attr.s(hash=True)
520 class mailmapping:
520 class mailmapping:
521 """Represents a username/email key or value in
521 """Represents a username/email key or value in
522 a mailmap file"""
522 a mailmap file"""
523
523
524 email = attr.ib()
524 email = attr.ib()
525 name = attr.ib(default=None)
525 name = attr.ib(default=None)
526
526
527
527
528 def _ismailmaplineinvalid(names, emails):
528 def _ismailmaplineinvalid(names, emails):
529 """Returns True if the parsed names and emails
529 """Returns True if the parsed names and emails
530 in a mailmap entry are invalid.
530 in a mailmap entry are invalid.
531
531
532 >>> # No names or emails fails
532 >>> # No names or emails fails
533 >>> names, emails = [], []
533 >>> names, emails = [], []
534 >>> _ismailmaplineinvalid(names, emails)
534 >>> _ismailmaplineinvalid(names, emails)
535 True
535 True
536 >>> # Only one email fails
536 >>> # Only one email fails
537 >>> emails = [b'email@email.com']
537 >>> emails = [b'email@email.com']
538 >>> _ismailmaplineinvalid(names, emails)
538 >>> _ismailmaplineinvalid(names, emails)
539 True
539 True
540 >>> # One email and one name passes
540 >>> # One email and one name passes
541 >>> names = [b'Test Name']
541 >>> names = [b'Test Name']
542 >>> _ismailmaplineinvalid(names, emails)
542 >>> _ismailmaplineinvalid(names, emails)
543 False
543 False
544 >>> # No names but two emails passes
544 >>> # No names but two emails passes
545 >>> names = []
545 >>> names = []
546 >>> emails = [b'proper@email.com', b'commit@email.com']
546 >>> emails = [b'proper@email.com', b'commit@email.com']
547 >>> _ismailmaplineinvalid(names, emails)
547 >>> _ismailmaplineinvalid(names, emails)
548 False
548 False
549 """
549 """
550 return not emails or not names and len(emails) < 2
550 return not emails or not names and len(emails) < 2
551
551
552
552
553 def parsemailmap(mailmapcontent):
553 def parsemailmap(mailmapcontent):
554 """Parses data in the .mailmap format
554 """Parses data in the .mailmap format
555
555
556 >>> mmdata = b"\\n".join([
556 >>> mmdata = b"\\n".join([
557 ... b'# Comment',
557 ... b'# Comment',
558 ... b'Name <commit1@email.xx>',
558 ... b'Name <commit1@email.xx>',
559 ... b'<name@email.xx> <commit2@email.xx>',
559 ... b'<name@email.xx> <commit2@email.xx>',
560 ... b'Name <proper@email.xx> <commit3@email.xx>',
560 ... b'Name <proper@email.xx> <commit3@email.xx>',
561 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
561 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
562 ... ])
562 ... ])
563 >>> mm = parsemailmap(mmdata)
563 >>> mm = parsemailmap(mmdata)
564 >>> for key in sorted(mm.keys()):
564 >>> for key in sorted(mm.keys()):
565 ... print(key)
565 ... print(key)
566 mailmapping(email='commit1@email.xx', name=None)
566 mailmapping(email='commit1@email.xx', name=None)
567 mailmapping(email='commit2@email.xx', name=None)
567 mailmapping(email='commit2@email.xx', name=None)
568 mailmapping(email='commit3@email.xx', name=None)
568 mailmapping(email='commit3@email.xx', name=None)
569 mailmapping(email='commit4@email.xx', name='Commit')
569 mailmapping(email='commit4@email.xx', name='Commit')
570 >>> for val in sorted(mm.values()):
570 >>> for val in sorted(mm.values()):
571 ... print(val)
571 ... print(val)
572 mailmapping(email='commit1@email.xx', name='Name')
572 mailmapping(email='commit1@email.xx', name='Name')
573 mailmapping(email='name@email.xx', name=None)
573 mailmapping(email='name@email.xx', name=None)
574 mailmapping(email='proper@email.xx', name='Name')
574 mailmapping(email='proper@email.xx', name='Name')
575 mailmapping(email='proper@email.xx', name='Name')
575 mailmapping(email='proper@email.xx', name='Name')
576 """
576 """
577 mailmap = {}
577 mailmap = {}
578
578
579 if mailmapcontent is None:
579 if mailmapcontent is None:
580 return mailmap
580 return mailmap
581
581
582 for line in mailmapcontent.splitlines():
582 for line in mailmapcontent.splitlines():
583 # Don't bother checking the line if it is a comment or
583 # Don't bother checking the line if it is a comment or
584 # is an improperly formed author field
584 # is an improperly formed author field
585 if line.lstrip().startswith(b'#'):
585 if line.lstrip().startswith(b'#'):
586 continue
586 continue
587
587
588 # names, emails hold the parsed emails and names for each line
588 # names, emails hold the parsed emails and names for each line
589 # name_builder holds the words in a persons name
589 # name_builder holds the words in a persons name
590 names, emails = [], []
590 names, emails = [], []
591 namebuilder = []
591 namebuilder = []
592
592
593 for element in line.split():
593 for element in line.split():
594 if element.startswith(b'#'):
594 if element.startswith(b'#'):
595 # If we reach a comment in the mailmap file, move on
595 # If we reach a comment in the mailmap file, move on
596 break
596 break
597
597
598 elif element.startswith(b'<') and element.endswith(b'>'):
598 elif element.startswith(b'<') and element.endswith(b'>'):
599 # We have found an email.
599 # We have found an email.
600 # Parse it, and finalize any names from earlier
600 # Parse it, and finalize any names from earlier
601 emails.append(element[1:-1]) # Slice off the "<>"
601 emails.append(element[1:-1]) # Slice off the "<>"
602
602
603 if namebuilder:
603 if namebuilder:
604 names.append(b' '.join(namebuilder))
604 names.append(b' '.join(namebuilder))
605 namebuilder = []
605 namebuilder = []
606
606
607 # Break if we have found a second email, any other
607 # Break if we have found a second email, any other
608 # data does not fit the spec for .mailmap
608 # data does not fit the spec for .mailmap
609 if len(emails) > 1:
609 if len(emails) > 1:
610 break
610 break
611
611
612 else:
612 else:
613 # We have found another word in the committers name
613 # We have found another word in the committers name
614 namebuilder.append(element)
614 namebuilder.append(element)
615
615
616 # Check to see if we have parsed the line into a valid form
616 # Check to see if we have parsed the line into a valid form
617 # We require at least one email, and either at least one
617 # We require at least one email, and either at least one
618 # name or a second email
618 # name or a second email
619 if _ismailmaplineinvalid(names, emails):
619 if _ismailmaplineinvalid(names, emails):
620 continue
620 continue
621
621
622 mailmapkey = mailmapping(
622 mailmapkey = mailmapping(
623 email=emails[-1],
623 email=emails[-1],
624 name=names[-1] if len(names) == 2 else None,
624 name=names[-1] if len(names) == 2 else None,
625 )
625 )
626
626
627 mailmap[mailmapkey] = mailmapping(
627 mailmap[mailmapkey] = mailmapping(
628 email=emails[0],
628 email=emails[0],
629 name=names[0] if names else None,
629 name=names[0] if names else None,
630 )
630 )
631
631
632 return mailmap
632 return mailmap
633
633
634
634
635 def mapname(mailmap, author: bytes) -> bytes:
635 def mapname(mailmap, author: bytes) -> bytes:
636 """Returns the author field according to the mailmap cache, or
636 """Returns the author field according to the mailmap cache, or
637 the original author field.
637 the original author field.
638
638
639 >>> mmdata = b"\\n".join([
639 >>> mmdata = b"\\n".join([
640 ... b'# Comment',
640 ... b'# Comment',
641 ... b'Name <commit1@email.xx>',
641 ... b'Name <commit1@email.xx>',
642 ... b'<name@email.xx> <commit2@email.xx>',
642 ... b'<name@email.xx> <commit2@email.xx>',
643 ... b'Name <proper@email.xx> <commit3@email.xx>',
643 ... b'Name <proper@email.xx> <commit3@email.xx>',
644 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
644 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
645 ... ])
645 ... ])
646 >>> m = parsemailmap(mmdata)
646 >>> m = parsemailmap(mmdata)
647 >>> mapname(m, b'Commit <commit1@email.xx>')
647 >>> mapname(m, b'Commit <commit1@email.xx>')
648 'Name <commit1@email.xx>'
648 'Name <commit1@email.xx>'
649 >>> mapname(m, b'Name <commit2@email.xx>')
649 >>> mapname(m, b'Name <commit2@email.xx>')
650 'Name <name@email.xx>'
650 'Name <name@email.xx>'
651 >>> mapname(m, b'Commit <commit3@email.xx>')
651 >>> mapname(m, b'Commit <commit3@email.xx>')
652 'Name <proper@email.xx>'
652 'Name <proper@email.xx>'
653 >>> mapname(m, b'Commit <commit4@email.xx>')
653 >>> mapname(m, b'Commit <commit4@email.xx>')
654 'Name <proper@email.xx>'
654 'Name <proper@email.xx>'
655 >>> mapname(m, b'Unknown Name <unknown@email.com>')
655 >>> mapname(m, b'Unknown Name <unknown@email.com>')
656 'Unknown Name <unknown@email.com>'
656 'Unknown Name <unknown@email.com>'
657 """
657 """
658 # If the author field coming in isn't in the correct format,
658 # If the author field coming in isn't in the correct format,
659 # or the mailmap is empty just return the original author field
659 # or the mailmap is empty just return the original author field
660 if not isauthorwellformed(author) or not mailmap:
660 if not isauthorwellformed(author) or not mailmap:
661 return author
661 return author
662
662
663 # Turn the user name into a mailmapping
663 # Turn the user name into a mailmapping
664 commit = mailmapping(name=person(author), email=email(author))
664 commit = mailmapping(name=person(author), email=email(author))
665
665
666 try:
666 try:
667 # Try and use both the commit email and name as the key
667 # Try and use both the commit email and name as the key
668 proper = mailmap[commit]
668 proper = mailmap[commit]
669
669
670 except KeyError:
670 except KeyError:
671 # If the lookup fails, use just the email as the key instead
671 # If the lookup fails, use just the email as the key instead
672 # We call this commit2 as not to erase original commit fields
672 # We call this commit2 as not to erase original commit fields
673 commit2 = mailmapping(email=commit.email)
673 commit2 = mailmapping(email=commit.email)
674 proper = mailmap.get(commit2, mailmapping(None, None))
674 proper = mailmap.get(commit2, mailmapping(None, None))
675
675
676 # Return the author field with proper values filled in
676 # Return the author field with proper values filled in
677 return b'%s <%s>' % (
677 return b'%s <%s>' % (
678 proper.name if proper.name else commit.name,
678 proper.name if proper.name else commit.name,
679 proper.email if proper.email else commit.email,
679 proper.email if proper.email else commit.email,
680 )
680 )
681
681
682
682
683 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
683 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
684
684
685
685
686 def isauthorwellformed(author: bytes) -> bool:
686 def isauthorwellformed(author: bytes) -> bool:
687 """Return True if the author field is well formed
687 """Return True if the author field is well formed
688 (ie "Contributor Name <contrib@email.dom>")
688 (ie "Contributor Name <contrib@email.dom>")
689
689
690 >>> isauthorwellformed(b'Good Author <good@author.com>')
690 >>> isauthorwellformed(b'Good Author <good@author.com>')
691 True
691 True
692 >>> isauthorwellformed(b'Author <good@author.com>')
692 >>> isauthorwellformed(b'Author <good@author.com>')
693 True
693 True
694 >>> isauthorwellformed(b'Bad Author')
694 >>> isauthorwellformed(b'Bad Author')
695 False
695 False
696 >>> isauthorwellformed(b'Bad Author <author@author.com')
696 >>> isauthorwellformed(b'Bad Author <author@author.com')
697 False
697 False
698 >>> isauthorwellformed(b'Bad Author author@author.com')
698 >>> isauthorwellformed(b'Bad Author author@author.com')
699 False
699 False
700 >>> isauthorwellformed(b'<author@author.com>')
700 >>> isauthorwellformed(b'<author@author.com>')
701 False
701 False
702 >>> isauthorwellformed(b'Bad Author <author>')
702 >>> isauthorwellformed(b'Bad Author <author>')
703 False
703 False
704 """
704 """
705 return _correctauthorformat.match(author) is not None
705 return _correctauthorformat.match(author) is not None
706
706
707
707
708 def firstline(text: bytes) -> bytes:
708 def firstline(text: bytes) -> bytes:
709 """Return the first line of the input"""
709 """Return the first line of the input"""
710 # Try to avoid running splitlines() on the whole string
710 # Try to avoid running splitlines() on the whole string
711 i = text.find(b'\n')
711 i = text.find(b'\n')
712 if i != -1:
712 if i != -1:
713 text = text[:i]
713 text = text[:i]
714 try:
714 try:
715 return text.splitlines()[0]
715 return text.splitlines()[0]
716 except IndexError:
716 except IndexError:
717 return b''
717 return b''
718
718
719
719
720 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
720 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
721 """Trim string to at most maxlength (default: 400) columns in display."""
721 """Trim string to at most maxlength (default: 400) columns in display."""
722 return encoding.trim(text, maxlength, ellipsis=b'...')
722 return encoding.trim(text, maxlength, ellipsis=b'...')
723
723
724
724
725 def escapestr(s: bytes) -> bytes:
725 def escapestr(s: bytes) -> bytes:
726 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
726 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
727 if isinstance(s, memoryview):
727 if isinstance(s, (memoryview, bytearray)):
728 s = bytes(s)
728 s = bytes(s)
729 # call underlying function of s.encode('string_escape') directly for
729 # call underlying function of s.encode('string_escape') directly for
730 # Python 3 compatibility
730 # Python 3 compatibility
731 # pytype: disable=bad-return-type
731 # pytype: disable=bad-return-type
732 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
732 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
733 # pytype: enable=bad-return-type
733 # pytype: enable=bad-return-type
734
734
735
735
736 def unescapestr(s: bytes) -> bytes:
736 def unescapestr(s: bytes) -> bytes:
737 # pytype: disable=bad-return-type
737 # pytype: disable=bad-return-type
738 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
738 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
739 # pytype: enable=bad-return-type
739 # pytype: enable=bad-return-type
740
740
741
741
742 def forcebytestr(obj):
742 def forcebytestr(obj):
743 """Portably format an arbitrary object (e.g. exception) into a byte
743 """Portably format an arbitrary object (e.g. exception) into a byte
744 string."""
744 string."""
745 try:
745 try:
746 return pycompat.bytestr(obj)
746 return pycompat.bytestr(obj)
747 except UnicodeEncodeError:
747 except UnicodeEncodeError:
748 # non-ascii string, may be lossy
748 # non-ascii string, may be lossy
749 return pycompat.bytestr(encoding.strtolocal(str(obj)))
749 return pycompat.bytestr(encoding.strtolocal(str(obj)))
750
750
751
751
752 def uirepr(s: bytes) -> bytes:
752 def uirepr(s: bytes) -> bytes:
753 # Avoid double backslash in Windows path repr()
753 # Avoid double backslash in Windows path repr()
754 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
754 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
755
755
756
756
757 # delay import of textwrap
757 # delay import of textwrap
758 def _MBTextWrapper(**kwargs):
758 def _MBTextWrapper(**kwargs):
759 class tw(textwrap.TextWrapper):
759 class tw(textwrap.TextWrapper):
760 """
760 """
761 Extend TextWrapper for width-awareness.
761 Extend TextWrapper for width-awareness.
762
762
763 Neither number of 'bytes' in any encoding nor 'characters' is
763 Neither number of 'bytes' in any encoding nor 'characters' is
764 appropriate to calculate terminal columns for specified string.
764 appropriate to calculate terminal columns for specified string.
765
765
766 Original TextWrapper implementation uses built-in 'len()' directly,
766 Original TextWrapper implementation uses built-in 'len()' directly,
767 so overriding is needed to use width information of each characters.
767 so overriding is needed to use width information of each characters.
768
768
769 In addition, characters classified into 'ambiguous' width are
769 In addition, characters classified into 'ambiguous' width are
770 treated as wide in East Asian area, but as narrow in other.
770 treated as wide in East Asian area, but as narrow in other.
771
771
772 This requires use decision to determine width of such characters.
772 This requires use decision to determine width of such characters.
773 """
773 """
774
774
775 def _cutdown(self, ucstr, space_left):
775 def _cutdown(self, ucstr, space_left):
776 l = 0
776 l = 0
777 colwidth = encoding.ucolwidth
777 colwidth = encoding.ucolwidth
778 for i in range(len(ucstr)):
778 for i in range(len(ucstr)):
779 l += colwidth(ucstr[i])
779 l += colwidth(ucstr[i])
780 if space_left < l:
780 if space_left < l:
781 return (ucstr[:i], ucstr[i:])
781 return (ucstr[:i], ucstr[i:])
782 return ucstr, b''
782 return ucstr, b''
783
783
784 # overriding of base class
784 # overriding of base class
785 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
785 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
786 space_left = max(width - cur_len, 1)
786 space_left = max(width - cur_len, 1)
787
787
788 if self.break_long_words:
788 if self.break_long_words:
789 cut, res = self._cutdown(reversed_chunks[-1], space_left)
789 cut, res = self._cutdown(reversed_chunks[-1], space_left)
790 cur_line.append(cut)
790 cur_line.append(cut)
791 reversed_chunks[-1] = res
791 reversed_chunks[-1] = res
792 elif not cur_line:
792 elif not cur_line:
793 cur_line.append(reversed_chunks.pop())
793 cur_line.append(reversed_chunks.pop())
794
794
795 # this overriding code is imported from TextWrapper of Python 2.6
795 # this overriding code is imported from TextWrapper of Python 2.6
796 # to calculate columns of string by 'encoding.ucolwidth()'
796 # to calculate columns of string by 'encoding.ucolwidth()'
797 def _wrap_chunks(self, chunks):
797 def _wrap_chunks(self, chunks):
798 colwidth = encoding.ucolwidth
798 colwidth = encoding.ucolwidth
799
799
800 lines = []
800 lines = []
801 if self.width <= 0:
801 if self.width <= 0:
802 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
802 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
803
803
804 # Arrange in reverse order so items can be efficiently popped
804 # Arrange in reverse order so items can be efficiently popped
805 # from a stack of chucks.
805 # from a stack of chucks.
806 chunks.reverse()
806 chunks.reverse()
807
807
808 while chunks:
808 while chunks:
809 # Start the list of chunks that will make up the current line.
809 # Start the list of chunks that will make up the current line.
810 # cur_len is just the length of all the chunks in cur_line.
810 # cur_len is just the length of all the chunks in cur_line.
811 cur_line = []
811 cur_line = []
812 cur_len = 0
812 cur_len = 0
813
813
814 # Figure out which static string will prefix this line.
814 # Figure out which static string will prefix this line.
815 if lines:
815 if lines:
816 indent = self.subsequent_indent
816 indent = self.subsequent_indent
817 else:
817 else:
818 indent = self.initial_indent
818 indent = self.initial_indent
819
819
820 # Maximum width for this line.
820 # Maximum width for this line.
821 width = self.width - len(indent)
821 width = self.width - len(indent)
822
822
823 # First chunk on line is whitespace -- drop it, unless this
823 # First chunk on line is whitespace -- drop it, unless this
824 # is the very beginning of the text (i.e. no lines started yet).
824 # is the very beginning of the text (i.e. no lines started yet).
825 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
825 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
826 del chunks[-1]
826 del chunks[-1]
827
827
828 while chunks:
828 while chunks:
829 l = colwidth(chunks[-1])
829 l = colwidth(chunks[-1])
830
830
831 # Can at least squeeze this chunk onto the current line.
831 # Can at least squeeze this chunk onto the current line.
832 if cur_len + l <= width:
832 if cur_len + l <= width:
833 cur_line.append(chunks.pop())
833 cur_line.append(chunks.pop())
834 cur_len += l
834 cur_len += l
835
835
836 # Nope, this line is full.
836 # Nope, this line is full.
837 else:
837 else:
838 break
838 break
839
839
840 # The current line is full, and the next chunk is too big to
840 # The current line is full, and the next chunk is too big to
841 # fit on *any* line (not just this one).
841 # fit on *any* line (not just this one).
842 if chunks and colwidth(chunks[-1]) > width:
842 if chunks and colwidth(chunks[-1]) > width:
843 self._handle_long_word(chunks, cur_line, cur_len, width)
843 self._handle_long_word(chunks, cur_line, cur_len, width)
844
844
845 # If the last chunk on this line is all whitespace, drop it.
845 # If the last chunk on this line is all whitespace, drop it.
846 if (
846 if (
847 self.drop_whitespace
847 self.drop_whitespace
848 and cur_line
848 and cur_line
849 and cur_line[-1].strip() == r''
849 and cur_line[-1].strip() == r''
850 ):
850 ):
851 del cur_line[-1]
851 del cur_line[-1]
852
852
853 # Convert current line back to a string and store it in list
853 # Convert current line back to a string and store it in list
854 # of all lines (return value).
854 # of all lines (return value).
855 if cur_line:
855 if cur_line:
856 lines.append(indent + ''.join(cur_line))
856 lines.append(indent + ''.join(cur_line))
857
857
858 return lines
858 return lines
859
859
860 global _MBTextWrapper
860 global _MBTextWrapper
861 _MBTextWrapper = tw
861 _MBTextWrapper = tw
862 return tw(**kwargs)
862 return tw(**kwargs)
863
863
864
864
865 def wrap(
865 def wrap(
866 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
866 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
867 ) -> bytes:
867 ) -> bytes:
868 maxindent = max(len(hangindent), len(initindent))
868 maxindent = max(len(hangindent), len(initindent))
869 if width <= maxindent:
869 if width <= maxindent:
870 # adjust for weird terminal size
870 # adjust for weird terminal size
871 width = max(78, maxindent + 1)
871 width = max(78, maxindent + 1)
872 line = line.decode(
872 line = line.decode(
873 pycompat.sysstr(encoding.encoding),
873 pycompat.sysstr(encoding.encoding),
874 pycompat.sysstr(encoding.encodingmode),
874 pycompat.sysstr(encoding.encodingmode),
875 )
875 )
876 initindent = initindent.decode(
876 initindent = initindent.decode(
877 pycompat.sysstr(encoding.encoding),
877 pycompat.sysstr(encoding.encoding),
878 pycompat.sysstr(encoding.encodingmode),
878 pycompat.sysstr(encoding.encodingmode),
879 )
879 )
880 hangindent = hangindent.decode(
880 hangindent = hangindent.decode(
881 pycompat.sysstr(encoding.encoding),
881 pycompat.sysstr(encoding.encoding),
882 pycompat.sysstr(encoding.encodingmode),
882 pycompat.sysstr(encoding.encodingmode),
883 )
883 )
884 wrapper = _MBTextWrapper(
884 wrapper = _MBTextWrapper(
885 width=width, initial_indent=initindent, subsequent_indent=hangindent
885 width=width, initial_indent=initindent, subsequent_indent=hangindent
886 )
886 )
887 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
887 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
888
888
889
889
890 _booleans = {
890 _booleans = {
891 b'1': True,
891 b'1': True,
892 b'yes': True,
892 b'yes': True,
893 b'true': True,
893 b'true': True,
894 b'on': True,
894 b'on': True,
895 b'always': True,
895 b'always': True,
896 b'0': False,
896 b'0': False,
897 b'no': False,
897 b'no': False,
898 b'false': False,
898 b'false': False,
899 b'off': False,
899 b'off': False,
900 b'never': False,
900 b'never': False,
901 }
901 }
902
902
903
903
904 def parsebool(s: bytes) -> Optional[bool]:
904 def parsebool(s: bytes) -> Optional[bool]:
905 """Parse s into a boolean.
905 """Parse s into a boolean.
906
906
907 If s is not a valid boolean, returns None.
907 If s is not a valid boolean, returns None.
908 """
908 """
909 return _booleans.get(s.lower(), None)
909 return _booleans.get(s.lower(), None)
910
910
911
911
912 # TODO: make arg mandatory (and fix code below?)
912 # TODO: make arg mandatory (and fix code below?)
913 def parselist(value: Optional[bytes]):
913 def parselist(value: Optional[bytes]):
914 """parse a configuration value as a list of comma/space separated strings
914 """parse a configuration value as a list of comma/space separated strings
915
915
916 >>> parselist(b'this,is "a small" ,test')
916 >>> parselist(b'this,is "a small" ,test')
917 ['this', 'is', 'a small', 'test']
917 ['this', 'is', 'a small', 'test']
918 """
918 """
919
919
920 def _parse_plain(parts, s, offset):
920 def _parse_plain(parts, s, offset):
921 whitespace = False
921 whitespace = False
922 while offset < len(s) and (
922 while offset < len(s) and (
923 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
923 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
924 ):
924 ):
925 whitespace = True
925 whitespace = True
926 offset += 1
926 offset += 1
927 if offset >= len(s):
927 if offset >= len(s):
928 return None, parts, offset
928 return None, parts, offset
929 if whitespace:
929 if whitespace:
930 parts.append(b'')
930 parts.append(b'')
931 if s[offset : offset + 1] == b'"' and not parts[-1]:
931 if s[offset : offset + 1] == b'"' and not parts[-1]:
932 return _parse_quote, parts, offset + 1
932 return _parse_quote, parts, offset + 1
933 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
933 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
934 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
934 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
935 return _parse_plain, parts, offset + 1
935 return _parse_plain, parts, offset + 1
936 parts[-1] += s[offset : offset + 1]
936 parts[-1] += s[offset : offset + 1]
937 return _parse_plain, parts, offset + 1
937 return _parse_plain, parts, offset + 1
938
938
939 def _parse_quote(parts, s, offset):
939 def _parse_quote(parts, s, offset):
940 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
940 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
941 parts.append(b'')
941 parts.append(b'')
942 offset += 1
942 offset += 1
943 while offset < len(s) and (
943 while offset < len(s) and (
944 s[offset : offset + 1].isspace()
944 s[offset : offset + 1].isspace()
945 or s[offset : offset + 1] == b','
945 or s[offset : offset + 1] == b','
946 ):
946 ):
947 offset += 1
947 offset += 1
948 return _parse_plain, parts, offset
948 return _parse_plain, parts, offset
949
949
950 while offset < len(s) and s[offset : offset + 1] != b'"':
950 while offset < len(s) and s[offset : offset + 1] != b'"':
951 if (
951 if (
952 s[offset : offset + 1] == b'\\'
952 s[offset : offset + 1] == b'\\'
953 and offset + 1 < len(s)
953 and offset + 1 < len(s)
954 and s[offset + 1 : offset + 2] == b'"'
954 and s[offset + 1 : offset + 2] == b'"'
955 ):
955 ):
956 offset += 1
956 offset += 1
957 parts[-1] += b'"'
957 parts[-1] += b'"'
958 else:
958 else:
959 parts[-1] += s[offset : offset + 1]
959 parts[-1] += s[offset : offset + 1]
960 offset += 1
960 offset += 1
961
961
962 if offset >= len(s):
962 if offset >= len(s):
963 real_parts = _configlist(parts[-1])
963 real_parts = _configlist(parts[-1])
964 if not real_parts:
964 if not real_parts:
965 parts[-1] = b'"'
965 parts[-1] = b'"'
966 else:
966 else:
967 real_parts[0] = b'"' + real_parts[0]
967 real_parts[0] = b'"' + real_parts[0]
968 parts = parts[:-1]
968 parts = parts[:-1]
969 parts.extend(real_parts)
969 parts.extend(real_parts)
970 return None, parts, offset
970 return None, parts, offset
971
971
972 offset += 1
972 offset += 1
973 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
973 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
974 offset += 1
974 offset += 1
975
975
976 if offset < len(s):
976 if offset < len(s):
977 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
977 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
978 parts[-1] += b'"'
978 parts[-1] += b'"'
979 offset += 1
979 offset += 1
980 else:
980 else:
981 parts.append(b'')
981 parts.append(b'')
982 else:
982 else:
983 return None, parts, offset
983 return None, parts, offset
984
984
985 return _parse_plain, parts, offset
985 return _parse_plain, parts, offset
986
986
987 def _configlist(s):
987 def _configlist(s):
988 s = s.rstrip(b' ,')
988 s = s.rstrip(b' ,')
989 if not s:
989 if not s:
990 return []
990 return []
991 parser, parts, offset = _parse_plain, [b''], 0
991 parser, parts, offset = _parse_plain, [b''], 0
992 while parser:
992 while parser:
993 parser, parts, offset = parser(parts, s, offset)
993 parser, parts, offset = parser(parts, s, offset)
994 return parts
994 return parts
995
995
996 if value is not None and isinstance(value, bytes):
996 if value is not None and isinstance(value, bytes):
997 result = _configlist(value.lstrip(b' ,\n'))
997 result = _configlist(value.lstrip(b' ,\n'))
998 else:
998 else:
999 result = value
999 result = value
1000 return result or []
1000 return result or []
1001
1001
1002
1002
1003 def evalpythonliteral(s: bytes):
1003 def evalpythonliteral(s: bytes):
1004 """Evaluate a string containing a Python literal expression"""
1004 """Evaluate a string containing a Python literal expression"""
1005 # We could backport our tokenizer hack to rewrite '' to u'' if we want
1005 # We could backport our tokenizer hack to rewrite '' to u'' if we want
1006 return ast.literal_eval(s.decode('latin1'))
1006 return ast.literal_eval(s.decode('latin1'))
General Comments 0
You need to be logged in to leave comments. Login now