##// END OF EJS Templates
typing: add basic type hints to stringutil.py
Matt Harbison -
r50470:bbbb5213 default
parent child Browse files
Show More
@@ -1,979 +1,998 b''
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10
10
11 import ast
11 import ast
12 import codecs
12 import codecs
13 import re as remod
13 import re as remod
14 import textwrap
14 import textwrap
15 import types
15 import types
16
16
17 from typing import (
18 Optional,
19 overload,
20 )
21
17 from ..i18n import _
22 from ..i18n import _
18 from ..thirdparty import attr
23 from ..thirdparty import attr
19
24
20 from .. import (
25 from .. import (
21 encoding,
26 encoding,
22 error,
27 error,
23 pycompat,
28 pycompat,
24 )
29 )
25
30
26 # regex special chars pulled from https://bugs.python.org/issue29995
31 # regex special chars pulled from https://bugs.python.org/issue29995
27 # which was part of Python 3.7.
32 # which was part of Python 3.7.
28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
33 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
34 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
35 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
31
36
32
37
38 @overload
39 def reescape(pat: bytes) -> bytes:
40 ...
41
42
43 @overload
44 def reescape(pat: str) -> str:
45 ...
46
47
33 def reescape(pat):
48 def reescape(pat):
34 """Drop-in replacement for re.escape."""
49 """Drop-in replacement for re.escape."""
35 # NOTE: it is intentional that this works on unicodes and not
50 # NOTE: it is intentional that this works on unicodes and not
36 # bytes, as it's only possible to do the escaping with
51 # bytes, as it's only possible to do the escaping with
37 # unicode.translate, not bytes.translate. Sigh.
52 # unicode.translate, not bytes.translate. Sigh.
38 wantuni = True
53 wantuni = True
39 if isinstance(pat, bytes):
54 if isinstance(pat, bytes):
40 wantuni = False
55 wantuni = False
41 pat = pat.decode('latin1')
56 pat = pat.decode('latin1')
42 pat = pat.translate(_regexescapemap)
57 pat = pat.translate(_regexescapemap)
43 if wantuni:
58 if wantuni:
44 return pat
59 return pat
45 return pat.encode('latin1')
60 return pat.encode('latin1')
46
61
47
62
48 def pprint(o, bprefix=False, indent=0, level=0):
63 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
49 """Pretty print an object."""
64 """Pretty print an object."""
50 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
65 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
51
66
52
67
53 def pprintgen(o, bprefix=False, indent=0, level=0):
68 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
54 """Pretty print an object to a generator of atoms.
69 """Pretty print an object to a generator of atoms.
55
70
56 ``bprefix`` is a flag influencing whether bytestrings are preferred with
71 ``bprefix`` is a flag influencing whether bytestrings are preferred with
57 a ``b''`` prefix.
72 a ``b''`` prefix.
58
73
59 ``indent`` controls whether collections and nested data structures
74 ``indent`` controls whether collections and nested data structures
60 span multiple lines via the indentation amount in spaces. By default,
75 span multiple lines via the indentation amount in spaces. By default,
61 no newlines are emitted.
76 no newlines are emitted.
62
77
63 ``level`` specifies the initial indent level. Used if ``indent > 0``.
78 ``level`` specifies the initial indent level. Used if ``indent > 0``.
64 """
79 """
65
80
66 if isinstance(o, bytes):
81 if isinstance(o, bytes):
67 if bprefix:
82 if bprefix:
68 yield b"b'%s'" % escapestr(o)
83 yield b"b'%s'" % escapestr(o)
69 else:
84 else:
70 yield b"'%s'" % escapestr(o)
85 yield b"'%s'" % escapestr(o)
71 elif isinstance(o, bytearray):
86 elif isinstance(o, bytearray):
72 # codecs.escape_encode() can't handle bytearray, so escapestr fails
87 # codecs.escape_encode() can't handle bytearray, so escapestr fails
73 # without coercion.
88 # without coercion.
74 yield b"bytearray['%s']" % escapestr(bytes(o))
89 yield b"bytearray['%s']" % escapestr(bytes(o))
75 elif isinstance(o, list):
90 elif isinstance(o, list):
76 if not o:
91 if not o:
77 yield b'[]'
92 yield b'[]'
78 return
93 return
79
94
80 yield b'['
95 yield b'['
81
96
82 if indent:
97 if indent:
83 level += 1
98 level += 1
84 yield b'\n'
99 yield b'\n'
85 yield b' ' * (level * indent)
100 yield b' ' * (level * indent)
86
101
87 for i, a in enumerate(o):
102 for i, a in enumerate(o):
88 for chunk in pprintgen(
103 for chunk in pprintgen(
89 a, bprefix=bprefix, indent=indent, level=level
104 a, bprefix=bprefix, indent=indent, level=level
90 ):
105 ):
91 yield chunk
106 yield chunk
92
107
93 if i + 1 < len(o):
108 if i + 1 < len(o):
94 if indent:
109 if indent:
95 yield b',\n'
110 yield b',\n'
96 yield b' ' * (level * indent)
111 yield b' ' * (level * indent)
97 else:
112 else:
98 yield b', '
113 yield b', '
99
114
100 if indent:
115 if indent:
101 level -= 1
116 level -= 1
102 yield b'\n'
117 yield b'\n'
103 yield b' ' * (level * indent)
118 yield b' ' * (level * indent)
104
119
105 yield b']'
120 yield b']'
106 elif isinstance(o, dict):
121 elif isinstance(o, dict):
107 if not o:
122 if not o:
108 yield b'{}'
123 yield b'{}'
109 return
124 return
110
125
111 yield b'{'
126 yield b'{'
112
127
113 if indent:
128 if indent:
114 level += 1
129 level += 1
115 yield b'\n'
130 yield b'\n'
116 yield b' ' * (level * indent)
131 yield b' ' * (level * indent)
117
132
118 for i, (k, v) in enumerate(sorted(o.items())):
133 for i, (k, v) in enumerate(sorted(o.items())):
119 for chunk in pprintgen(
134 for chunk in pprintgen(
120 k, bprefix=bprefix, indent=indent, level=level
135 k, bprefix=bprefix, indent=indent, level=level
121 ):
136 ):
122 yield chunk
137 yield chunk
123
138
124 yield b': '
139 yield b': '
125
140
126 for chunk in pprintgen(
141 for chunk in pprintgen(
127 v, bprefix=bprefix, indent=indent, level=level
142 v, bprefix=bprefix, indent=indent, level=level
128 ):
143 ):
129 yield chunk
144 yield chunk
130
145
131 if i + 1 < len(o):
146 if i + 1 < len(o):
132 if indent:
147 if indent:
133 yield b',\n'
148 yield b',\n'
134 yield b' ' * (level * indent)
149 yield b' ' * (level * indent)
135 else:
150 else:
136 yield b', '
151 yield b', '
137
152
138 if indent:
153 if indent:
139 level -= 1
154 level -= 1
140 yield b'\n'
155 yield b'\n'
141 yield b' ' * (level * indent)
156 yield b' ' * (level * indent)
142
157
143 yield b'}'
158 yield b'}'
144 elif isinstance(o, set):
159 elif isinstance(o, set):
145 if not o:
160 if not o:
146 yield b'set([])'
161 yield b'set([])'
147 return
162 return
148
163
149 yield b'set(['
164 yield b'set(['
150
165
151 if indent:
166 if indent:
152 level += 1
167 level += 1
153 yield b'\n'
168 yield b'\n'
154 yield b' ' * (level * indent)
169 yield b' ' * (level * indent)
155
170
156 for i, k in enumerate(sorted(o)):
171 for i, k in enumerate(sorted(o)):
157 for chunk in pprintgen(
172 for chunk in pprintgen(
158 k, bprefix=bprefix, indent=indent, level=level
173 k, bprefix=bprefix, indent=indent, level=level
159 ):
174 ):
160 yield chunk
175 yield chunk
161
176
162 if i + 1 < len(o):
177 if i + 1 < len(o):
163 if indent:
178 if indent:
164 yield b',\n'
179 yield b',\n'
165 yield b' ' * (level * indent)
180 yield b' ' * (level * indent)
166 else:
181 else:
167 yield b', '
182 yield b', '
168
183
169 if indent:
184 if indent:
170 level -= 1
185 level -= 1
171 yield b'\n'
186 yield b'\n'
172 yield b' ' * (level * indent)
187 yield b' ' * (level * indent)
173
188
174 yield b'])'
189 yield b'])'
175 elif isinstance(o, tuple):
190 elif isinstance(o, tuple):
176 if not o:
191 if not o:
177 yield b'()'
192 yield b'()'
178 return
193 return
179
194
180 yield b'('
195 yield b'('
181
196
182 if indent:
197 if indent:
183 level += 1
198 level += 1
184 yield b'\n'
199 yield b'\n'
185 yield b' ' * (level * indent)
200 yield b' ' * (level * indent)
186
201
187 for i, a in enumerate(o):
202 for i, a in enumerate(o):
188 for chunk in pprintgen(
203 for chunk in pprintgen(
189 a, bprefix=bprefix, indent=indent, level=level
204 a, bprefix=bprefix, indent=indent, level=level
190 ):
205 ):
191 yield chunk
206 yield chunk
192
207
193 if i + 1 < len(o):
208 if i + 1 < len(o):
194 if indent:
209 if indent:
195 yield b',\n'
210 yield b',\n'
196 yield b' ' * (level * indent)
211 yield b' ' * (level * indent)
197 else:
212 else:
198 yield b', '
213 yield b', '
199
214
200 if indent:
215 if indent:
201 level -= 1
216 level -= 1
202 yield b'\n'
217 yield b'\n'
203 yield b' ' * (level * indent)
218 yield b' ' * (level * indent)
204
219
205 yield b')'
220 yield b')'
206 elif isinstance(o, types.GeneratorType):
221 elif isinstance(o, types.GeneratorType):
207 # Special case of empty generator.
222 # Special case of empty generator.
208 try:
223 try:
209 nextitem = next(o)
224 nextitem = next(o)
210 except StopIteration:
225 except StopIteration:
211 yield b'gen[]'
226 yield b'gen[]'
212 return
227 return
213
228
214 yield b'gen['
229 yield b'gen['
215
230
216 if indent:
231 if indent:
217 level += 1
232 level += 1
218 yield b'\n'
233 yield b'\n'
219 yield b' ' * (level * indent)
234 yield b' ' * (level * indent)
220
235
221 last = False
236 last = False
222
237
223 while not last:
238 while not last:
224 current = nextitem
239 current = nextitem
225
240
226 try:
241 try:
227 nextitem = next(o)
242 nextitem = next(o)
228 except StopIteration:
243 except StopIteration:
229 last = True
244 last = True
230
245
231 for chunk in pprintgen(
246 for chunk in pprintgen(
232 current, bprefix=bprefix, indent=indent, level=level
247 current, bprefix=bprefix, indent=indent, level=level
233 ):
248 ):
234 yield chunk
249 yield chunk
235
250
236 if not last:
251 if not last:
237 if indent:
252 if indent:
238 yield b',\n'
253 yield b',\n'
239 yield b' ' * (level * indent)
254 yield b' ' * (level * indent)
240 else:
255 else:
241 yield b', '
256 yield b', '
242
257
243 if indent:
258 if indent:
244 level -= 1
259 level -= 1
245 yield b'\n'
260 yield b'\n'
246 yield b' ' * (level * indent)
261 yield b' ' * (level * indent)
247
262
248 yield b']'
263 yield b']'
249 else:
264 else:
250 yield pycompat.byterepr(o)
265 yield pycompat.byterepr(o)
251
266
252
267
253 def prettyrepr(o):
268 def prettyrepr(o) -> bytes:
254 """Pretty print a representation of a possibly-nested object"""
269 """Pretty print a representation of a possibly-nested object"""
255 lines = []
270 lines = []
256 rs = pycompat.byterepr(o)
271 rs = pycompat.byterepr(o)
257 p0 = p1 = 0
272 p0 = p1 = 0
258 while p0 < len(rs):
273 while p0 < len(rs):
259 # '... field=<type ... field=<type ...'
274 # '... field=<type ... field=<type ...'
260 # ~~~~~~~~~~~~~~~~
275 # ~~~~~~~~~~~~~~~~
261 # p0 p1 q0 q1
276 # p0 p1 q0 q1
262 q0 = -1
277 q0 = -1
263 q1 = rs.find(b'<', p1 + 1)
278 q1 = rs.find(b'<', p1 + 1)
264 if q1 < 0:
279 if q1 < 0:
265 q1 = len(rs)
280 q1 = len(rs)
266 # pytype: disable=wrong-arg-count
281 # pytype: disable=wrong-arg-count
267 # TODO: figure out why pytype doesn't recognize the optional start
282 # TODO: figure out why pytype doesn't recognize the optional start
268 # arg
283 # arg
269 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
284 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
270 # pytype: enable=wrong-arg-count
285 # pytype: enable=wrong-arg-count
271 # backtrack for ' field=<'
286 # backtrack for ' field=<'
272 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
287 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
273 if q0 < 0:
288 if q0 < 0:
274 q0 = q1
289 q0 = q1
275 else:
290 else:
276 q0 += 1 # skip ' '
291 q0 += 1 # skip ' '
277 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
292 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
278 assert l >= 0
293 assert l >= 0
279 lines.append((l, rs[p0:q0].rstrip()))
294 lines.append((l, rs[p0:q0].rstrip()))
280 p0, p1 = q0, q1
295 p0, p1 = q0, q1
281 return b'\n'.join(b' ' * l + s for l, s in lines)
296 return b'\n'.join(b' ' * l + s for l, s in lines)
282
297
283
298
284 def buildrepr(r):
299 def buildrepr(r) -> bytes:
285 """Format an optional printable representation from unexpanded bits
300 """Format an optional printable representation from unexpanded bits
286
301
287 ======== =================================
302 ======== =================================
288 type(r) example
303 type(r) example
289 ======== =================================
304 ======== =================================
290 tuple ('<not %r>', other)
305 tuple ('<not %r>', other)
291 bytes '<branch closed>'
306 bytes '<branch closed>'
292 callable lambda: '<branch %r>' % sorted(b)
307 callable lambda: '<branch %r>' % sorted(b)
293 object other
308 object other
294 ======== =================================
309 ======== =================================
295 """
310 """
296 if r is None:
311 if r is None:
297 return b''
312 return b''
298 elif isinstance(r, tuple):
313 elif isinstance(r, tuple):
299 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
314 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
300 elif isinstance(r, bytes):
315 elif isinstance(r, bytes):
301 return r
316 return r
302 elif callable(r):
317 elif callable(r):
303 return r()
318 return r()
304 else:
319 else:
305 return pprint(r)
320 return pprint(r)
306
321
307
322
308 def binary(s):
323 def binary(s: bytes) -> bool:
309 """return true if a string is binary data"""
324 """return true if a string is binary data"""
310 return bool(s and b'\0' in s)
325 return bool(s and b'\0' in s)
311
326
312
327
313 def _splitpattern(pattern):
328 def _splitpattern(pattern: bytes):
314 if pattern.startswith(b're:'):
329 if pattern.startswith(b're:'):
315 return b're', pattern[3:]
330 return b're', pattern[3:]
316 elif pattern.startswith(b'literal:'):
331 elif pattern.startswith(b'literal:'):
317 return b'literal', pattern[8:]
332 return b'literal', pattern[8:]
318 return b'literal', pattern
333 return b'literal', pattern
319
334
320
335
321 def stringmatcher(pattern, casesensitive=True):
336 def stringmatcher(pattern: bytes, casesensitive: bool = True):
322 """
337 """
323 accepts a string, possibly starting with 're:' or 'literal:' prefix.
338 accepts a string, possibly starting with 're:' or 'literal:' prefix.
324 returns the matcher name, pattern, and matcher function.
339 returns the matcher name, pattern, and matcher function.
325 missing or unknown prefixes are treated as literal matches.
340 missing or unknown prefixes are treated as literal matches.
326
341
327 helper for tests:
342 helper for tests:
328 >>> def test(pattern, *tests):
343 >>> def test(pattern, *tests):
329 ... kind, pattern, matcher = stringmatcher(pattern)
344 ... kind, pattern, matcher = stringmatcher(pattern)
330 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
345 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
331 >>> def itest(pattern, *tests):
346 >>> def itest(pattern, *tests):
332 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
347 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
333 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
348 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
334
349
335 exact matching (no prefix):
350 exact matching (no prefix):
336 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
351 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
337 ('literal', 'abcdefg', [False, False, True])
352 ('literal', 'abcdefg', [False, False, True])
338
353
339 regex matching ('re:' prefix)
354 regex matching ('re:' prefix)
340 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
355 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
341 ('re', 'a.+b', [False, False, True])
356 ('re', 'a.+b', [False, False, True])
342
357
343 force exact matches ('literal:' prefix)
358 force exact matches ('literal:' prefix)
344 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
359 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
345 ('literal', 're:foobar', [False, True])
360 ('literal', 're:foobar', [False, True])
346
361
347 unknown prefixes are ignored and treated as literals
362 unknown prefixes are ignored and treated as literals
348 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
363 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
349 ('literal', 'foo:bar', [False, False, True])
364 ('literal', 'foo:bar', [False, False, True])
350
365
351 case insensitive regex matches
366 case insensitive regex matches
352 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
367 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
353 ('re', 'A.+b', [False, False, True])
368 ('re', 'A.+b', [False, False, True])
354
369
355 case insensitive literal matches
370 case insensitive literal matches
356 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
371 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
357 ('literal', 'ABCDEFG', [False, False, True])
372 ('literal', 'ABCDEFG', [False, False, True])
358 """
373 """
359 kind, pattern = _splitpattern(pattern)
374 kind, pattern = _splitpattern(pattern)
360 if kind == b're':
375 if kind == b're':
361 try:
376 try:
362 flags = 0
377 flags = 0
363 if not casesensitive:
378 if not casesensitive:
364 flags = remod.I
379 flags = remod.I
365 regex = remod.compile(pattern, flags)
380 regex = remod.compile(pattern, flags)
366 except remod.error as e:
381 except remod.error as e:
367 raise error.ParseError(
382 raise error.ParseError(
368 _(b'invalid regular expression: %s') % forcebytestr(e)
383 _(b'invalid regular expression: %s') % forcebytestr(e)
369 )
384 )
370 return kind, pattern, regex.search
385 return kind, pattern, regex.search
371 elif kind == b'literal':
386 elif kind == b'literal':
372 if casesensitive:
387 if casesensitive:
373 match = pattern.__eq__
388 match = pattern.__eq__
374 else:
389 else:
375 ipat = encoding.lower(pattern)
390 ipat = encoding.lower(pattern)
376 match = lambda s: ipat == encoding.lower(s)
391 match = lambda s: ipat == encoding.lower(s)
377 return kind, pattern, match
392 return kind, pattern, match
378
393
379 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
394 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
380
395
381
396
382 def substringregexp(pattern, flags=0):
397 def substringregexp(pattern: bytes, flags: int = 0):
383 """Build a regexp object from a string pattern possibly starting with
398 """Build a regexp object from a string pattern possibly starting with
384 're:' or 'literal:' prefix.
399 're:' or 'literal:' prefix.
385
400
386 helper for tests:
401 helper for tests:
387 >>> def test(pattern, *tests):
402 >>> def test(pattern, *tests):
388 ... regexp = substringregexp(pattern)
403 ... regexp = substringregexp(pattern)
389 ... return [bool(regexp.search(t)) for t in tests]
404 ... return [bool(regexp.search(t)) for t in tests]
390 >>> def itest(pattern, *tests):
405 >>> def itest(pattern, *tests):
391 ... regexp = substringregexp(pattern, remod.I)
406 ... regexp = substringregexp(pattern, remod.I)
392 ... return [bool(regexp.search(t)) for t in tests]
407 ... return [bool(regexp.search(t)) for t in tests]
393
408
394 substring matching (no prefix):
409 substring matching (no prefix):
395 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
410 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
396 [False, False, True]
411 [False, False, True]
397
412
398 substring pattern should be escaped:
413 substring pattern should be escaped:
399 >>> substringregexp(b'.bc').pattern
414 >>> substringregexp(b'.bc').pattern
400 '\\\\.bc'
415 '\\\\.bc'
401 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
416 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
402 [False, False, False]
417 [False, False, False]
403
418
404 regex matching ('re:' prefix)
419 regex matching ('re:' prefix)
405 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
420 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
406 [False, False, True]
421 [False, False, True]
407
422
408 force substring matches ('literal:' prefix)
423 force substring matches ('literal:' prefix)
409 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
424 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
410 [False, True]
425 [False, True]
411
426
412 case insensitive literal matches
427 case insensitive literal matches
413 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
428 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
414 [False, False, True]
429 [False, False, True]
415
430
416 case insensitive regex matches
431 case insensitive regex matches
417 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
432 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
418 [False, False, True]
433 [False, False, True]
419 """
434 """
420 kind, pattern = _splitpattern(pattern)
435 kind, pattern = _splitpattern(pattern)
421 if kind == b're':
436 if kind == b're':
422 try:
437 try:
423 return remod.compile(pattern, flags)
438 return remod.compile(pattern, flags)
424 except remod.error as e:
439 except remod.error as e:
425 raise error.ParseError(
440 raise error.ParseError(
426 _(b'invalid regular expression: %s') % forcebytestr(e)
441 _(b'invalid regular expression: %s') % forcebytestr(e)
427 )
442 )
428 elif kind == b'literal':
443 elif kind == b'literal':
429 return remod.compile(remod.escape(pattern), flags)
444 return remod.compile(remod.escape(pattern), flags)
430
445
431 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
446 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
432
447
433
448
434 def shortuser(user):
449 def shortuser(user: bytes) -> bytes:
435 """Return a short representation of a user name or email address."""
450 """Return a short representation of a user name or email address."""
436 f = user.find(b'@')
451 f = user.find(b'@')
437 if f >= 0:
452 if f >= 0:
438 user = user[:f]
453 user = user[:f]
439 f = user.find(b'<')
454 f = user.find(b'<')
440 if f >= 0:
455 if f >= 0:
441 user = user[f + 1 :]
456 user = user[f + 1 :]
442 f = user.find(b' ')
457 f = user.find(b' ')
443 if f >= 0:
458 if f >= 0:
444 user = user[:f]
459 user = user[:f]
445 f = user.find(b'.')
460 f = user.find(b'.')
446 if f >= 0:
461 if f >= 0:
447 user = user[:f]
462 user = user[:f]
448 return user
463 return user
449
464
450
465
451 def emailuser(user):
466 def emailuser(user: bytes) -> bytes:
452 """Return the user portion of an email address."""
467 """Return the user portion of an email address."""
453 f = user.find(b'@')
468 f = user.find(b'@')
454 if f >= 0:
469 if f >= 0:
455 user = user[:f]
470 user = user[:f]
456 f = user.find(b'<')
471 f = user.find(b'<')
457 if f >= 0:
472 if f >= 0:
458 user = user[f + 1 :]
473 user = user[f + 1 :]
459 return user
474 return user
460
475
461
476
462 def email(author):
477 def email(author: bytes) -> bytes:
463 '''get email of author.'''
478 '''get email of author.'''
464 r = author.find(b'>')
479 r = author.find(b'>')
465 if r == -1:
480 if r == -1:
466 r = None
481 r = None
467 return author[author.find(b'<') + 1 : r]
482 return author[author.find(b'<') + 1 : r]
468
483
469
484
470 def person(author):
485 def person(author: bytes) -> bytes:
471 """Returns the name before an email address,
486 """Returns the name before an email address,
472 interpreting it as per RFC 5322
487 interpreting it as per RFC 5322
473
488
474 >>> person(b'foo@bar')
489 >>> person(b'foo@bar')
475 'foo'
490 'foo'
476 >>> person(b'Foo Bar <foo@bar>')
491 >>> person(b'Foo Bar <foo@bar>')
477 'Foo Bar'
492 'Foo Bar'
478 >>> person(b'"Foo Bar" <foo@bar>')
493 >>> person(b'"Foo Bar" <foo@bar>')
479 'Foo Bar'
494 'Foo Bar'
480 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
495 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
481 'Foo "buz" Bar'
496 'Foo "buz" Bar'
482 >>> # The following are invalid, but do exist in real-life
497 >>> # The following are invalid, but do exist in real-life
483 ...
498 ...
484 >>> person(b'Foo "buz" Bar <foo@bar>')
499 >>> person(b'Foo "buz" Bar <foo@bar>')
485 'Foo "buz" Bar'
500 'Foo "buz" Bar'
486 >>> person(b'"Foo Bar <foo@bar>')
501 >>> person(b'"Foo Bar <foo@bar>')
487 'Foo Bar'
502 'Foo Bar'
488 """
503 """
489 if b'@' not in author:
504 if b'@' not in author:
490 return author
505 return author
491 f = author.find(b'<')
506 f = author.find(b'<')
492 if f != -1:
507 if f != -1:
493 return author[:f].strip(b' "').replace(b'\\"', b'"')
508 return author[:f].strip(b' "').replace(b'\\"', b'"')
494 f = author.find(b'@')
509 f = author.find(b'@')
495 return author[:f].replace(b'.', b' ')
510 return author[:f].replace(b'.', b' ')
496
511
497
512
498 @attr.s(hash=True)
513 @attr.s(hash=True)
499 class mailmapping:
514 class mailmapping:
500 """Represents a username/email key or value in
515 """Represents a username/email key or value in
501 a mailmap file"""
516 a mailmap file"""
502
517
503 email = attr.ib()
518 email = attr.ib()
504 name = attr.ib(default=None)
519 name = attr.ib(default=None)
505
520
506
521
507 def _ismailmaplineinvalid(names, emails):
522 def _ismailmaplineinvalid(names, emails):
508 """Returns True if the parsed names and emails
523 """Returns True if the parsed names and emails
509 in a mailmap entry are invalid.
524 in a mailmap entry are invalid.
510
525
511 >>> # No names or emails fails
526 >>> # No names or emails fails
512 >>> names, emails = [], []
527 >>> names, emails = [], []
513 >>> _ismailmaplineinvalid(names, emails)
528 >>> _ismailmaplineinvalid(names, emails)
514 True
529 True
515 >>> # Only one email fails
530 >>> # Only one email fails
516 >>> emails = [b'email@email.com']
531 >>> emails = [b'email@email.com']
517 >>> _ismailmaplineinvalid(names, emails)
532 >>> _ismailmaplineinvalid(names, emails)
518 True
533 True
519 >>> # One email and one name passes
534 >>> # One email and one name passes
520 >>> names = [b'Test Name']
535 >>> names = [b'Test Name']
521 >>> _ismailmaplineinvalid(names, emails)
536 >>> _ismailmaplineinvalid(names, emails)
522 False
537 False
523 >>> # No names but two emails passes
538 >>> # No names but two emails passes
524 >>> names = []
539 >>> names = []
525 >>> emails = [b'proper@email.com', b'commit@email.com']
540 >>> emails = [b'proper@email.com', b'commit@email.com']
526 >>> _ismailmaplineinvalid(names, emails)
541 >>> _ismailmaplineinvalid(names, emails)
527 False
542 False
528 """
543 """
529 return not emails or not names and len(emails) < 2
544 return not emails or not names and len(emails) < 2
530
545
531
546
532 def parsemailmap(mailmapcontent):
547 def parsemailmap(mailmapcontent):
533 """Parses data in the .mailmap format
548 """Parses data in the .mailmap format
534
549
535 >>> mmdata = b"\\n".join([
550 >>> mmdata = b"\\n".join([
536 ... b'# Comment',
551 ... b'# Comment',
537 ... b'Name <commit1@email.xx>',
552 ... b'Name <commit1@email.xx>',
538 ... b'<name@email.xx> <commit2@email.xx>',
553 ... b'<name@email.xx> <commit2@email.xx>',
539 ... b'Name <proper@email.xx> <commit3@email.xx>',
554 ... b'Name <proper@email.xx> <commit3@email.xx>',
540 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
555 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
541 ... ])
556 ... ])
542 >>> mm = parsemailmap(mmdata)
557 >>> mm = parsemailmap(mmdata)
543 >>> for key in sorted(mm.keys()):
558 >>> for key in sorted(mm.keys()):
544 ... print(key)
559 ... print(key)
545 mailmapping(email='commit1@email.xx', name=None)
560 mailmapping(email='commit1@email.xx', name=None)
546 mailmapping(email='commit2@email.xx', name=None)
561 mailmapping(email='commit2@email.xx', name=None)
547 mailmapping(email='commit3@email.xx', name=None)
562 mailmapping(email='commit3@email.xx', name=None)
548 mailmapping(email='commit4@email.xx', name='Commit')
563 mailmapping(email='commit4@email.xx', name='Commit')
549 >>> for val in sorted(mm.values()):
564 >>> for val in sorted(mm.values()):
550 ... print(val)
565 ... print(val)
551 mailmapping(email='commit1@email.xx', name='Name')
566 mailmapping(email='commit1@email.xx', name='Name')
552 mailmapping(email='name@email.xx', name=None)
567 mailmapping(email='name@email.xx', name=None)
553 mailmapping(email='proper@email.xx', name='Name')
568 mailmapping(email='proper@email.xx', name='Name')
554 mailmapping(email='proper@email.xx', name='Name')
569 mailmapping(email='proper@email.xx', name='Name')
555 """
570 """
556 mailmap = {}
571 mailmap = {}
557
572
558 if mailmapcontent is None:
573 if mailmapcontent is None:
559 return mailmap
574 return mailmap
560
575
561 for line in mailmapcontent.splitlines():
576 for line in mailmapcontent.splitlines():
562
577
563 # Don't bother checking the line if it is a comment or
578 # Don't bother checking the line if it is a comment or
564 # is an improperly formed author field
579 # is an improperly formed author field
565 if line.lstrip().startswith(b'#'):
580 if line.lstrip().startswith(b'#'):
566 continue
581 continue
567
582
568 # names, emails hold the parsed emails and names for each line
583 # names, emails hold the parsed emails and names for each line
569 # name_builder holds the words in a persons name
584 # name_builder holds the words in a persons name
570 names, emails = [], []
585 names, emails = [], []
571 namebuilder = []
586 namebuilder = []
572
587
573 for element in line.split():
588 for element in line.split():
574 if element.startswith(b'#'):
589 if element.startswith(b'#'):
575 # If we reach a comment in the mailmap file, move on
590 # If we reach a comment in the mailmap file, move on
576 break
591 break
577
592
578 elif element.startswith(b'<') and element.endswith(b'>'):
593 elif element.startswith(b'<') and element.endswith(b'>'):
579 # We have found an email.
594 # We have found an email.
580 # Parse it, and finalize any names from earlier
595 # Parse it, and finalize any names from earlier
581 emails.append(element[1:-1]) # Slice off the "<>"
596 emails.append(element[1:-1]) # Slice off the "<>"
582
597
583 if namebuilder:
598 if namebuilder:
584 names.append(b' '.join(namebuilder))
599 names.append(b' '.join(namebuilder))
585 namebuilder = []
600 namebuilder = []
586
601
587 # Break if we have found a second email, any other
602 # Break if we have found a second email, any other
588 # data does not fit the spec for .mailmap
603 # data does not fit the spec for .mailmap
589 if len(emails) > 1:
604 if len(emails) > 1:
590 break
605 break
591
606
592 else:
607 else:
593 # We have found another word in the committers name
608 # We have found another word in the committers name
594 namebuilder.append(element)
609 namebuilder.append(element)
595
610
596 # Check to see if we have parsed the line into a valid form
611 # Check to see if we have parsed the line into a valid form
597 # We require at least one email, and either at least one
612 # We require at least one email, and either at least one
598 # name or a second email
613 # name or a second email
599 if _ismailmaplineinvalid(names, emails):
614 if _ismailmaplineinvalid(names, emails):
600 continue
615 continue
601
616
602 mailmapkey = mailmapping(
617 mailmapkey = mailmapping(
603 email=emails[-1],
618 email=emails[-1],
604 name=names[-1] if len(names) == 2 else None,
619 name=names[-1] if len(names) == 2 else None,
605 )
620 )
606
621
607 mailmap[mailmapkey] = mailmapping(
622 mailmap[mailmapkey] = mailmapping(
608 email=emails[0],
623 email=emails[0],
609 name=names[0] if names else None,
624 name=names[0] if names else None,
610 )
625 )
611
626
612 return mailmap
627 return mailmap
613
628
614
629
615 def mapname(mailmap, author):
630 def mapname(mailmap, author: bytes) -> bytes:
616 """Returns the author field according to the mailmap cache, or
631 """Returns the author field according to the mailmap cache, or
617 the original author field.
632 the original author field.
618
633
619 >>> mmdata = b"\\n".join([
634 >>> mmdata = b"\\n".join([
620 ... b'# Comment',
635 ... b'# Comment',
621 ... b'Name <commit1@email.xx>',
636 ... b'Name <commit1@email.xx>',
622 ... b'<name@email.xx> <commit2@email.xx>',
637 ... b'<name@email.xx> <commit2@email.xx>',
623 ... b'Name <proper@email.xx> <commit3@email.xx>',
638 ... b'Name <proper@email.xx> <commit3@email.xx>',
624 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
639 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
625 ... ])
640 ... ])
626 >>> m = parsemailmap(mmdata)
641 >>> m = parsemailmap(mmdata)
627 >>> mapname(m, b'Commit <commit1@email.xx>')
642 >>> mapname(m, b'Commit <commit1@email.xx>')
628 'Name <commit1@email.xx>'
643 'Name <commit1@email.xx>'
629 >>> mapname(m, b'Name <commit2@email.xx>')
644 >>> mapname(m, b'Name <commit2@email.xx>')
630 'Name <name@email.xx>'
645 'Name <name@email.xx>'
631 >>> mapname(m, b'Commit <commit3@email.xx>')
646 >>> mapname(m, b'Commit <commit3@email.xx>')
632 'Name <proper@email.xx>'
647 'Name <proper@email.xx>'
633 >>> mapname(m, b'Commit <commit4@email.xx>')
648 >>> mapname(m, b'Commit <commit4@email.xx>')
634 'Name <proper@email.xx>'
649 'Name <proper@email.xx>'
635 >>> mapname(m, b'Unknown Name <unknown@email.com>')
650 >>> mapname(m, b'Unknown Name <unknown@email.com>')
636 'Unknown Name <unknown@email.com>'
651 'Unknown Name <unknown@email.com>'
637 """
652 """
638 # If the author field coming in isn't in the correct format,
653 # If the author field coming in isn't in the correct format,
639 # or the mailmap is empty just return the original author field
654 # or the mailmap is empty just return the original author field
640 if not isauthorwellformed(author) or not mailmap:
655 if not isauthorwellformed(author) or not mailmap:
641 return author
656 return author
642
657
643 # Turn the user name into a mailmapping
658 # Turn the user name into a mailmapping
644 commit = mailmapping(name=person(author), email=email(author))
659 commit = mailmapping(name=person(author), email=email(author))
645
660
646 try:
661 try:
647 # Try and use both the commit email and name as the key
662 # Try and use both the commit email and name as the key
648 proper = mailmap[commit]
663 proper = mailmap[commit]
649
664
650 except KeyError:
665 except KeyError:
651 # If the lookup fails, use just the email as the key instead
666 # If the lookup fails, use just the email as the key instead
652 # We call this commit2 as not to erase original commit fields
667 # We call this commit2 as not to erase original commit fields
653 commit2 = mailmapping(email=commit.email)
668 commit2 = mailmapping(email=commit.email)
654 proper = mailmap.get(commit2, mailmapping(None, None))
669 proper = mailmap.get(commit2, mailmapping(None, None))
655
670
656 # Return the author field with proper values filled in
671 # Return the author field with proper values filled in
657 return b'%s <%s>' % (
672 return b'%s <%s>' % (
658 proper.name if proper.name else commit.name,
673 proper.name if proper.name else commit.name,
659 proper.email if proper.email else commit.email,
674 proper.email if proper.email else commit.email,
660 )
675 )
661
676
662
677
663 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
678 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
664
679
665
680
666 def isauthorwellformed(author):
681 def isauthorwellformed(author: bytes) -> bool:
667 """Return True if the author field is well formed
682 """Return True if the author field is well formed
668 (ie "Contributor Name <contrib@email.dom>")
683 (ie "Contributor Name <contrib@email.dom>")
669
684
670 >>> isauthorwellformed(b'Good Author <good@author.com>')
685 >>> isauthorwellformed(b'Good Author <good@author.com>')
671 True
686 True
672 >>> isauthorwellformed(b'Author <good@author.com>')
687 >>> isauthorwellformed(b'Author <good@author.com>')
673 True
688 True
674 >>> isauthorwellformed(b'Bad Author')
689 >>> isauthorwellformed(b'Bad Author')
675 False
690 False
676 >>> isauthorwellformed(b'Bad Author <author@author.com')
691 >>> isauthorwellformed(b'Bad Author <author@author.com')
677 False
692 False
678 >>> isauthorwellformed(b'Bad Author author@author.com')
693 >>> isauthorwellformed(b'Bad Author author@author.com')
679 False
694 False
680 >>> isauthorwellformed(b'<author@author.com>')
695 >>> isauthorwellformed(b'<author@author.com>')
681 False
696 False
682 >>> isauthorwellformed(b'Bad Author <author>')
697 >>> isauthorwellformed(b'Bad Author <author>')
683 False
698 False
684 """
699 """
685 return _correctauthorformat.match(author) is not None
700 return _correctauthorformat.match(author) is not None
686
701
687
702
688 def firstline(text):
703 def firstline(text: bytes) -> bytes:
689 """Return the first line of the input"""
704 """Return the first line of the input"""
690 # Try to avoid running splitlines() on the whole string
705 # Try to avoid running splitlines() on the whole string
691 i = text.find(b'\n')
706 i = text.find(b'\n')
692 if i != -1:
707 if i != -1:
693 text = text[:i]
708 text = text[:i]
694 try:
709 try:
695 return text.splitlines()[0]
710 return text.splitlines()[0]
696 except IndexError:
711 except IndexError:
697 return b''
712 return b''
698
713
699
714
700 def ellipsis(text, maxlength=400):
715 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
701 """Trim string to at most maxlength (default: 400) columns in display."""
716 """Trim string to at most maxlength (default: 400) columns in display."""
702 return encoding.trim(text, maxlength, ellipsis=b'...')
717 return encoding.trim(text, maxlength, ellipsis=b'...')
703
718
704
719
705 def escapestr(s):
720 def escapestr(s: bytes) -> bytes:
721 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
706 if isinstance(s, memoryview):
722 if isinstance(s, memoryview):
707 s = bytes(s)
723 s = bytes(s)
708 # call underlying function of s.encode('string_escape') directly for
724 # call underlying function of s.encode('string_escape') directly for
709 # Python 3 compatibility
725 # Python 3 compatibility
710 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
726 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
711
727
712
728
713 def unescapestr(s):
729 def unescapestr(s: bytes) -> bytes:
714 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
730 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
715
731
716
732
717 def forcebytestr(obj):
733 def forcebytestr(obj):
718 """Portably format an arbitrary object (e.g. exception) into a byte
734 """Portably format an arbitrary object (e.g. exception) into a byte
719 string."""
735 string."""
720 try:
736 try:
721 return pycompat.bytestr(obj)
737 return pycompat.bytestr(obj)
722 except UnicodeEncodeError:
738 except UnicodeEncodeError:
723 # non-ascii string, may be lossy
739 # non-ascii string, may be lossy
724 return pycompat.bytestr(encoding.strtolocal(str(obj)))
740 return pycompat.bytestr(encoding.strtolocal(str(obj)))
725
741
726
742
727 def uirepr(s):
743 def uirepr(s: bytes) -> bytes:
728 # Avoid double backslash in Windows path repr()
744 # Avoid double backslash in Windows path repr()
729 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
745 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
730
746
731
747
732 # delay import of textwrap
748 # delay import of textwrap
733 def _MBTextWrapper(**kwargs):
749 def _MBTextWrapper(**kwargs):
734 class tw(textwrap.TextWrapper):
750 class tw(textwrap.TextWrapper):
735 """
751 """
736 Extend TextWrapper for width-awareness.
752 Extend TextWrapper for width-awareness.
737
753
738 Neither number of 'bytes' in any encoding nor 'characters' is
754 Neither number of 'bytes' in any encoding nor 'characters' is
739 appropriate to calculate terminal columns for specified string.
755 appropriate to calculate terminal columns for specified string.
740
756
741 Original TextWrapper implementation uses built-in 'len()' directly,
757 Original TextWrapper implementation uses built-in 'len()' directly,
742 so overriding is needed to use width information of each characters.
758 so overriding is needed to use width information of each characters.
743
759
744 In addition, characters classified into 'ambiguous' width are
760 In addition, characters classified into 'ambiguous' width are
745 treated as wide in East Asian area, but as narrow in other.
761 treated as wide in East Asian area, but as narrow in other.
746
762
747 This requires use decision to determine width of such characters.
763 This requires use decision to determine width of such characters.
748 """
764 """
749
765
750 def _cutdown(self, ucstr, space_left):
766 def _cutdown(self, ucstr, space_left):
751 l = 0
767 l = 0
752 colwidth = encoding.ucolwidth
768 colwidth = encoding.ucolwidth
753 for i in range(len(ucstr)):
769 for i in range(len(ucstr)):
754 l += colwidth(ucstr[i])
770 l += colwidth(ucstr[i])
755 if space_left < l:
771 if space_left < l:
756 return (ucstr[:i], ucstr[i:])
772 return (ucstr[:i], ucstr[i:])
757 return ucstr, b''
773 return ucstr, b''
758
774
759 # overriding of base class
775 # overriding of base class
760 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
776 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
761 space_left = max(width - cur_len, 1)
777 space_left = max(width - cur_len, 1)
762
778
763 if self.break_long_words:
779 if self.break_long_words:
764 cut, res = self._cutdown(reversed_chunks[-1], space_left)
780 cut, res = self._cutdown(reversed_chunks[-1], space_left)
765 cur_line.append(cut)
781 cur_line.append(cut)
766 reversed_chunks[-1] = res
782 reversed_chunks[-1] = res
767 elif not cur_line:
783 elif not cur_line:
768 cur_line.append(reversed_chunks.pop())
784 cur_line.append(reversed_chunks.pop())
769
785
770 # this overriding code is imported from TextWrapper of Python 2.6
786 # this overriding code is imported from TextWrapper of Python 2.6
771 # to calculate columns of string by 'encoding.ucolwidth()'
787 # to calculate columns of string by 'encoding.ucolwidth()'
772 def _wrap_chunks(self, chunks):
788 def _wrap_chunks(self, chunks):
773 colwidth = encoding.ucolwidth
789 colwidth = encoding.ucolwidth
774
790
775 lines = []
791 lines = []
776 if self.width <= 0:
792 if self.width <= 0:
777 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
793 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
778
794
779 # Arrange in reverse order so items can be efficiently popped
795 # Arrange in reverse order so items can be efficiently popped
780 # from a stack of chucks.
796 # from a stack of chucks.
781 chunks.reverse()
797 chunks.reverse()
782
798
783 while chunks:
799 while chunks:
784
800
785 # Start the list of chunks that will make up the current line.
801 # Start the list of chunks that will make up the current line.
786 # cur_len is just the length of all the chunks in cur_line.
802 # cur_len is just the length of all the chunks in cur_line.
787 cur_line = []
803 cur_line = []
788 cur_len = 0
804 cur_len = 0
789
805
790 # Figure out which static string will prefix this line.
806 # Figure out which static string will prefix this line.
791 if lines:
807 if lines:
792 indent = self.subsequent_indent
808 indent = self.subsequent_indent
793 else:
809 else:
794 indent = self.initial_indent
810 indent = self.initial_indent
795
811
796 # Maximum width for this line.
812 # Maximum width for this line.
797 width = self.width - len(indent)
813 width = self.width - len(indent)
798
814
799 # First chunk on line is whitespace -- drop it, unless this
815 # First chunk on line is whitespace -- drop it, unless this
800 # is the very beginning of the text (i.e. no lines started yet).
816 # is the very beginning of the text (i.e. no lines started yet).
801 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
817 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
802 del chunks[-1]
818 del chunks[-1]
803
819
804 while chunks:
820 while chunks:
805 l = colwidth(chunks[-1])
821 l = colwidth(chunks[-1])
806
822
807 # Can at least squeeze this chunk onto the current line.
823 # Can at least squeeze this chunk onto the current line.
808 if cur_len + l <= width:
824 if cur_len + l <= width:
809 cur_line.append(chunks.pop())
825 cur_line.append(chunks.pop())
810 cur_len += l
826 cur_len += l
811
827
812 # Nope, this line is full.
828 # Nope, this line is full.
813 else:
829 else:
814 break
830 break
815
831
816 # The current line is full, and the next chunk is too big to
832 # The current line is full, and the next chunk is too big to
817 # fit on *any* line (not just this one).
833 # fit on *any* line (not just this one).
818 if chunks and colwidth(chunks[-1]) > width:
834 if chunks and colwidth(chunks[-1]) > width:
819 self._handle_long_word(chunks, cur_line, cur_len, width)
835 self._handle_long_word(chunks, cur_line, cur_len, width)
820
836
821 # If the last chunk on this line is all whitespace, drop it.
837 # If the last chunk on this line is all whitespace, drop it.
822 if (
838 if (
823 self.drop_whitespace
839 self.drop_whitespace
824 and cur_line
840 and cur_line
825 and cur_line[-1].strip() == r''
841 and cur_line[-1].strip() == r''
826 ):
842 ):
827 del cur_line[-1]
843 del cur_line[-1]
828
844
829 # Convert current line back to a string and store it in list
845 # Convert current line back to a string and store it in list
830 # of all lines (return value).
846 # of all lines (return value).
831 if cur_line:
847 if cur_line:
832 lines.append(indent + ''.join(cur_line))
848 lines.append(indent + ''.join(cur_line))
833
849
834 return lines
850 return lines
835
851
836 global _MBTextWrapper
852 global _MBTextWrapper
837 _MBTextWrapper = tw
853 _MBTextWrapper = tw
838 return tw(**kwargs)
854 return tw(**kwargs)
839
855
840
856
841 def wrap(line, width, initindent=b'', hangindent=b''):
857 def wrap(
858 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
859 ) -> bytes:
842 maxindent = max(len(hangindent), len(initindent))
860 maxindent = max(len(hangindent), len(initindent))
843 if width <= maxindent:
861 if width <= maxindent:
844 # adjust for weird terminal size
862 # adjust for weird terminal size
845 width = max(78, maxindent + 1)
863 width = max(78, maxindent + 1)
846 line = line.decode(
864 line = line.decode(
847 pycompat.sysstr(encoding.encoding),
865 pycompat.sysstr(encoding.encoding),
848 pycompat.sysstr(encoding.encodingmode),
866 pycompat.sysstr(encoding.encodingmode),
849 )
867 )
850 initindent = initindent.decode(
868 initindent = initindent.decode(
851 pycompat.sysstr(encoding.encoding),
869 pycompat.sysstr(encoding.encoding),
852 pycompat.sysstr(encoding.encodingmode),
870 pycompat.sysstr(encoding.encodingmode),
853 )
871 )
854 hangindent = hangindent.decode(
872 hangindent = hangindent.decode(
855 pycompat.sysstr(encoding.encoding),
873 pycompat.sysstr(encoding.encoding),
856 pycompat.sysstr(encoding.encodingmode),
874 pycompat.sysstr(encoding.encodingmode),
857 )
875 )
858 wrapper = _MBTextWrapper(
876 wrapper = _MBTextWrapper(
859 width=width, initial_indent=initindent, subsequent_indent=hangindent
877 width=width, initial_indent=initindent, subsequent_indent=hangindent
860 )
878 )
861 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
879 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
862
880
863
881
864 _booleans = {
882 _booleans = {
865 b'1': True,
883 b'1': True,
866 b'yes': True,
884 b'yes': True,
867 b'true': True,
885 b'true': True,
868 b'on': True,
886 b'on': True,
869 b'always': True,
887 b'always': True,
870 b'0': False,
888 b'0': False,
871 b'no': False,
889 b'no': False,
872 b'false': False,
890 b'false': False,
873 b'off': False,
891 b'off': False,
874 b'never': False,
892 b'never': False,
875 }
893 }
876
894
877
895
878 def parsebool(s):
896 def parsebool(s: bytes) -> Optional[bool]:
879 """Parse s into a boolean.
897 """Parse s into a boolean.
880
898
881 If s is not a valid boolean, returns None.
899 If s is not a valid boolean, returns None.
882 """
900 """
883 return _booleans.get(s.lower(), None)
901 return _booleans.get(s.lower(), None)
884
902
885
903
886 def parselist(value):
904 # TODO: make arg mandatory (and fix code below?)
905 def parselist(value: Optional[bytes]):
887 """parse a configuration value as a list of comma/space separated strings
906 """parse a configuration value as a list of comma/space separated strings
888
907
889 >>> parselist(b'this,is "a small" ,test')
908 >>> parselist(b'this,is "a small" ,test')
890 ['this', 'is', 'a small', 'test']
909 ['this', 'is', 'a small', 'test']
891 """
910 """
892
911
893 def _parse_plain(parts, s, offset):
912 def _parse_plain(parts, s, offset):
894 whitespace = False
913 whitespace = False
895 while offset < len(s) and (
914 while offset < len(s) and (
896 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
915 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
897 ):
916 ):
898 whitespace = True
917 whitespace = True
899 offset += 1
918 offset += 1
900 if offset >= len(s):
919 if offset >= len(s):
901 return None, parts, offset
920 return None, parts, offset
902 if whitespace:
921 if whitespace:
903 parts.append(b'')
922 parts.append(b'')
904 if s[offset : offset + 1] == b'"' and not parts[-1]:
923 if s[offset : offset + 1] == b'"' and not parts[-1]:
905 return _parse_quote, parts, offset + 1
924 return _parse_quote, parts, offset + 1
906 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
925 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
907 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
926 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
908 return _parse_plain, parts, offset + 1
927 return _parse_plain, parts, offset + 1
909 parts[-1] += s[offset : offset + 1]
928 parts[-1] += s[offset : offset + 1]
910 return _parse_plain, parts, offset + 1
929 return _parse_plain, parts, offset + 1
911
930
912 def _parse_quote(parts, s, offset):
931 def _parse_quote(parts, s, offset):
913 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
932 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
914 parts.append(b'')
933 parts.append(b'')
915 offset += 1
934 offset += 1
916 while offset < len(s) and (
935 while offset < len(s) and (
917 s[offset : offset + 1].isspace()
936 s[offset : offset + 1].isspace()
918 or s[offset : offset + 1] == b','
937 or s[offset : offset + 1] == b','
919 ):
938 ):
920 offset += 1
939 offset += 1
921 return _parse_plain, parts, offset
940 return _parse_plain, parts, offset
922
941
923 while offset < len(s) and s[offset : offset + 1] != b'"':
942 while offset < len(s) and s[offset : offset + 1] != b'"':
924 if (
943 if (
925 s[offset : offset + 1] == b'\\'
944 s[offset : offset + 1] == b'\\'
926 and offset + 1 < len(s)
945 and offset + 1 < len(s)
927 and s[offset + 1 : offset + 2] == b'"'
946 and s[offset + 1 : offset + 2] == b'"'
928 ):
947 ):
929 offset += 1
948 offset += 1
930 parts[-1] += b'"'
949 parts[-1] += b'"'
931 else:
950 else:
932 parts[-1] += s[offset : offset + 1]
951 parts[-1] += s[offset : offset + 1]
933 offset += 1
952 offset += 1
934
953
935 if offset >= len(s):
954 if offset >= len(s):
936 real_parts = _configlist(parts[-1])
955 real_parts = _configlist(parts[-1])
937 if not real_parts:
956 if not real_parts:
938 parts[-1] = b'"'
957 parts[-1] = b'"'
939 else:
958 else:
940 real_parts[0] = b'"' + real_parts[0]
959 real_parts[0] = b'"' + real_parts[0]
941 parts = parts[:-1]
960 parts = parts[:-1]
942 parts.extend(real_parts)
961 parts.extend(real_parts)
943 return None, parts, offset
962 return None, parts, offset
944
963
945 offset += 1
964 offset += 1
946 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
965 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
947 offset += 1
966 offset += 1
948
967
949 if offset < len(s):
968 if offset < len(s):
950 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
969 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
951 parts[-1] += b'"'
970 parts[-1] += b'"'
952 offset += 1
971 offset += 1
953 else:
972 else:
954 parts.append(b'')
973 parts.append(b'')
955 else:
974 else:
956 return None, parts, offset
975 return None, parts, offset
957
976
958 return _parse_plain, parts, offset
977 return _parse_plain, parts, offset
959
978
960 def _configlist(s):
979 def _configlist(s):
961 s = s.rstrip(b' ,')
980 s = s.rstrip(b' ,')
962 if not s:
981 if not s:
963 return []
982 return []
964 parser, parts, offset = _parse_plain, [b''], 0
983 parser, parts, offset = _parse_plain, [b''], 0
965 while parser:
984 while parser:
966 parser, parts, offset = parser(parts, s, offset)
985 parser, parts, offset = parser(parts, s, offset)
967 return parts
986 return parts
968
987
969 if value is not None and isinstance(value, bytes):
988 if value is not None and isinstance(value, bytes):
970 result = _configlist(value.lstrip(b' ,\n'))
989 result = _configlist(value.lstrip(b' ,\n'))
971 else:
990 else:
972 result = value
991 result = value
973 return result or []
992 return result or []
974
993
975
994
976 def evalpythonliteral(s):
995 def evalpythonliteral(s: bytes):
977 """Evaluate a string containing a Python literal expression"""
996 """Evaluate a string containing a Python literal expression"""
978 # We could backport our tokenizer hack to rewrite '' to u'' if we want
997 # We could backport our tokenizer hack to rewrite '' to u'' if we want
979 return ast.literal_eval(s.decode('latin1'))
998 return ast.literal_eval(s.decode('latin1'))
General Comments 0
You need to be logged in to leave comments. Login now