##// END OF EJS Templates
typing: add basic type hints to stringutil.py
Matt Harbison -
r50470:bbbb5213 default
parent child Browse files
Show More
@@ -1,979 +1,998 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10
11 11 import ast
12 12 import codecs
13 13 import re as remod
14 14 import textwrap
15 15 import types
16 16
17 from typing import (
18 Optional,
19 overload,
20 )
21
17 22 from ..i18n import _
18 23 from ..thirdparty import attr
19 24
20 25 from .. import (
21 26 encoding,
22 27 error,
23 28 pycompat,
24 29 )
25 30
26 31 # regex special chars pulled from https://bugs.python.org/issue29995
27 32 # which was part of Python 3.7.
28 33 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 34 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 35 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
31 36
32 37
38 @overload
39 def reescape(pat: bytes) -> bytes:
40 ...
41
42
43 @overload
44 def reescape(pat: str) -> str:
45 ...
46
47
33 48 def reescape(pat):
34 49 """Drop-in replacement for re.escape."""
35 50 # NOTE: it is intentional that this works on unicodes and not
36 51 # bytes, as it's only possible to do the escaping with
37 52 # unicode.translate, not bytes.translate. Sigh.
38 53 wantuni = True
39 54 if isinstance(pat, bytes):
40 55 wantuni = False
41 56 pat = pat.decode('latin1')
42 57 pat = pat.translate(_regexescapemap)
43 58 if wantuni:
44 59 return pat
45 60 return pat.encode('latin1')
46 61
47 62
48 def pprint(o, bprefix=False, indent=0, level=0):
63 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
49 64 """Pretty print an object."""
50 65 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
51 66
52 67
53 def pprintgen(o, bprefix=False, indent=0, level=0):
68 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
54 69 """Pretty print an object to a generator of atoms.
55 70
56 71 ``bprefix`` is a flag influencing whether bytestrings are preferred with
57 72 a ``b''`` prefix.
58 73
59 74 ``indent`` controls whether collections and nested data structures
60 75 span multiple lines via the indentation amount in spaces. By default,
61 76 no newlines are emitted.
62 77
63 78 ``level`` specifies the initial indent level. Used if ``indent > 0``.
64 79 """
65 80
66 81 if isinstance(o, bytes):
67 82 if bprefix:
68 83 yield b"b'%s'" % escapestr(o)
69 84 else:
70 85 yield b"'%s'" % escapestr(o)
71 86 elif isinstance(o, bytearray):
72 87 # codecs.escape_encode() can't handle bytearray, so escapestr fails
73 88 # without coercion.
74 89 yield b"bytearray['%s']" % escapestr(bytes(o))
75 90 elif isinstance(o, list):
76 91 if not o:
77 92 yield b'[]'
78 93 return
79 94
80 95 yield b'['
81 96
82 97 if indent:
83 98 level += 1
84 99 yield b'\n'
85 100 yield b' ' * (level * indent)
86 101
87 102 for i, a in enumerate(o):
88 103 for chunk in pprintgen(
89 104 a, bprefix=bprefix, indent=indent, level=level
90 105 ):
91 106 yield chunk
92 107
93 108 if i + 1 < len(o):
94 109 if indent:
95 110 yield b',\n'
96 111 yield b' ' * (level * indent)
97 112 else:
98 113 yield b', '
99 114
100 115 if indent:
101 116 level -= 1
102 117 yield b'\n'
103 118 yield b' ' * (level * indent)
104 119
105 120 yield b']'
106 121 elif isinstance(o, dict):
107 122 if not o:
108 123 yield b'{}'
109 124 return
110 125
111 126 yield b'{'
112 127
113 128 if indent:
114 129 level += 1
115 130 yield b'\n'
116 131 yield b' ' * (level * indent)
117 132
118 133 for i, (k, v) in enumerate(sorted(o.items())):
119 134 for chunk in pprintgen(
120 135 k, bprefix=bprefix, indent=indent, level=level
121 136 ):
122 137 yield chunk
123 138
124 139 yield b': '
125 140
126 141 for chunk in pprintgen(
127 142 v, bprefix=bprefix, indent=indent, level=level
128 143 ):
129 144 yield chunk
130 145
131 146 if i + 1 < len(o):
132 147 if indent:
133 148 yield b',\n'
134 149 yield b' ' * (level * indent)
135 150 else:
136 151 yield b', '
137 152
138 153 if indent:
139 154 level -= 1
140 155 yield b'\n'
141 156 yield b' ' * (level * indent)
142 157
143 158 yield b'}'
144 159 elif isinstance(o, set):
145 160 if not o:
146 161 yield b'set([])'
147 162 return
148 163
149 164 yield b'set(['
150 165
151 166 if indent:
152 167 level += 1
153 168 yield b'\n'
154 169 yield b' ' * (level * indent)
155 170
156 171 for i, k in enumerate(sorted(o)):
157 172 for chunk in pprintgen(
158 173 k, bprefix=bprefix, indent=indent, level=level
159 174 ):
160 175 yield chunk
161 176
162 177 if i + 1 < len(o):
163 178 if indent:
164 179 yield b',\n'
165 180 yield b' ' * (level * indent)
166 181 else:
167 182 yield b', '
168 183
169 184 if indent:
170 185 level -= 1
171 186 yield b'\n'
172 187 yield b' ' * (level * indent)
173 188
174 189 yield b'])'
175 190 elif isinstance(o, tuple):
176 191 if not o:
177 192 yield b'()'
178 193 return
179 194
180 195 yield b'('
181 196
182 197 if indent:
183 198 level += 1
184 199 yield b'\n'
185 200 yield b' ' * (level * indent)
186 201
187 202 for i, a in enumerate(o):
188 203 for chunk in pprintgen(
189 204 a, bprefix=bprefix, indent=indent, level=level
190 205 ):
191 206 yield chunk
192 207
193 208 if i + 1 < len(o):
194 209 if indent:
195 210 yield b',\n'
196 211 yield b' ' * (level * indent)
197 212 else:
198 213 yield b', '
199 214
200 215 if indent:
201 216 level -= 1
202 217 yield b'\n'
203 218 yield b' ' * (level * indent)
204 219
205 220 yield b')'
206 221 elif isinstance(o, types.GeneratorType):
207 222 # Special case of empty generator.
208 223 try:
209 224 nextitem = next(o)
210 225 except StopIteration:
211 226 yield b'gen[]'
212 227 return
213 228
214 229 yield b'gen['
215 230
216 231 if indent:
217 232 level += 1
218 233 yield b'\n'
219 234 yield b' ' * (level * indent)
220 235
221 236 last = False
222 237
223 238 while not last:
224 239 current = nextitem
225 240
226 241 try:
227 242 nextitem = next(o)
228 243 except StopIteration:
229 244 last = True
230 245
231 246 for chunk in pprintgen(
232 247 current, bprefix=bprefix, indent=indent, level=level
233 248 ):
234 249 yield chunk
235 250
236 251 if not last:
237 252 if indent:
238 253 yield b',\n'
239 254 yield b' ' * (level * indent)
240 255 else:
241 256 yield b', '
242 257
243 258 if indent:
244 259 level -= 1
245 260 yield b'\n'
246 261 yield b' ' * (level * indent)
247 262
248 263 yield b']'
249 264 else:
250 265 yield pycompat.byterepr(o)
251 266
252 267
253 def prettyrepr(o):
268 def prettyrepr(o) -> bytes:
254 269 """Pretty print a representation of a possibly-nested object"""
255 270 lines = []
256 271 rs = pycompat.byterepr(o)
257 272 p0 = p1 = 0
258 273 while p0 < len(rs):
259 274 # '... field=<type ... field=<type ...'
260 275 # ~~~~~~~~~~~~~~~~
261 276 # p0 p1 q0 q1
262 277 q0 = -1
263 278 q1 = rs.find(b'<', p1 + 1)
264 279 if q1 < 0:
265 280 q1 = len(rs)
266 281 # pytype: disable=wrong-arg-count
267 282 # TODO: figure out why pytype doesn't recognize the optional start
268 283 # arg
269 284 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
270 285 # pytype: enable=wrong-arg-count
271 286 # backtrack for ' field=<'
272 287 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
273 288 if q0 < 0:
274 289 q0 = q1
275 290 else:
276 291 q0 += 1 # skip ' '
277 292 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
278 293 assert l >= 0
279 294 lines.append((l, rs[p0:q0].rstrip()))
280 295 p0, p1 = q0, q1
281 296 return b'\n'.join(b' ' * l + s for l, s in lines)
282 297
283 298
284 def buildrepr(r):
299 def buildrepr(r) -> bytes:
285 300 """Format an optional printable representation from unexpanded bits
286 301
287 302 ======== =================================
288 303 type(r) example
289 304 ======== =================================
290 305 tuple ('<not %r>', other)
291 306 bytes '<branch closed>'
292 307 callable lambda: '<branch %r>' % sorted(b)
293 308 object other
294 309 ======== =================================
295 310 """
296 311 if r is None:
297 312 return b''
298 313 elif isinstance(r, tuple):
299 314 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
300 315 elif isinstance(r, bytes):
301 316 return r
302 317 elif callable(r):
303 318 return r()
304 319 else:
305 320 return pprint(r)
306 321
307 322
308 def binary(s):
323 def binary(s: bytes) -> bool:
309 324 """return true if a string is binary data"""
310 325 return bool(s and b'\0' in s)
311 326
312 327
313 def _splitpattern(pattern):
328 def _splitpattern(pattern: bytes):
314 329 if pattern.startswith(b're:'):
315 330 return b're', pattern[3:]
316 331 elif pattern.startswith(b'literal:'):
317 332 return b'literal', pattern[8:]
318 333 return b'literal', pattern
319 334
320 335
321 def stringmatcher(pattern, casesensitive=True):
336 def stringmatcher(pattern: bytes, casesensitive: bool = True):
322 337 """
323 338 accepts a string, possibly starting with 're:' or 'literal:' prefix.
324 339 returns the matcher name, pattern, and matcher function.
325 340 missing or unknown prefixes are treated as literal matches.
326 341
327 342 helper for tests:
328 343 >>> def test(pattern, *tests):
329 344 ... kind, pattern, matcher = stringmatcher(pattern)
330 345 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
331 346 >>> def itest(pattern, *tests):
332 347 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
333 348 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
334 349
335 350 exact matching (no prefix):
336 351 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
337 352 ('literal', 'abcdefg', [False, False, True])
338 353
339 354 regex matching ('re:' prefix)
340 355 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
341 356 ('re', 'a.+b', [False, False, True])
342 357
343 358 force exact matches ('literal:' prefix)
344 359 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
345 360 ('literal', 're:foobar', [False, True])
346 361
347 362 unknown prefixes are ignored and treated as literals
348 363 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
349 364 ('literal', 'foo:bar', [False, False, True])
350 365
351 366 case insensitive regex matches
352 367 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
353 368 ('re', 'A.+b', [False, False, True])
354 369
355 370 case insensitive literal matches
356 371 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
357 372 ('literal', 'ABCDEFG', [False, False, True])
358 373 """
359 374 kind, pattern = _splitpattern(pattern)
360 375 if kind == b're':
361 376 try:
362 377 flags = 0
363 378 if not casesensitive:
364 379 flags = remod.I
365 380 regex = remod.compile(pattern, flags)
366 381 except remod.error as e:
367 382 raise error.ParseError(
368 383 _(b'invalid regular expression: %s') % forcebytestr(e)
369 384 )
370 385 return kind, pattern, regex.search
371 386 elif kind == b'literal':
372 387 if casesensitive:
373 388 match = pattern.__eq__
374 389 else:
375 390 ipat = encoding.lower(pattern)
376 391 match = lambda s: ipat == encoding.lower(s)
377 392 return kind, pattern, match
378 393
379 394 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
380 395
381 396
382 def substringregexp(pattern, flags=0):
397 def substringregexp(pattern: bytes, flags: int = 0):
383 398 """Build a regexp object from a string pattern possibly starting with
384 399 're:' or 'literal:' prefix.
385 400
386 401 helper for tests:
387 402 >>> def test(pattern, *tests):
388 403 ... regexp = substringregexp(pattern)
389 404 ... return [bool(regexp.search(t)) for t in tests]
390 405 >>> def itest(pattern, *tests):
391 406 ... regexp = substringregexp(pattern, remod.I)
392 407 ... return [bool(regexp.search(t)) for t in tests]
393 408
394 409 substring matching (no prefix):
395 410 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
396 411 [False, False, True]
397 412
398 413 substring pattern should be escaped:
399 414 >>> substringregexp(b'.bc').pattern
400 415 '\\\\.bc'
401 416 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
402 417 [False, False, False]
403 418
404 419 regex matching ('re:' prefix)
405 420 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
406 421 [False, False, True]
407 422
408 423 force substring matches ('literal:' prefix)
409 424 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
410 425 [False, True]
411 426
412 427 case insensitive literal matches
413 428 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
414 429 [False, False, True]
415 430
416 431 case insensitive regex matches
417 432 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
418 433 [False, False, True]
419 434 """
420 435 kind, pattern = _splitpattern(pattern)
421 436 if kind == b're':
422 437 try:
423 438 return remod.compile(pattern, flags)
424 439 except remod.error as e:
425 440 raise error.ParseError(
426 441 _(b'invalid regular expression: %s') % forcebytestr(e)
427 442 )
428 443 elif kind == b'literal':
429 444 return remod.compile(remod.escape(pattern), flags)
430 445
431 446 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
432 447
433 448
434 def shortuser(user):
449 def shortuser(user: bytes) -> bytes:
435 450 """Return a short representation of a user name or email address."""
436 451 f = user.find(b'@')
437 452 if f >= 0:
438 453 user = user[:f]
439 454 f = user.find(b'<')
440 455 if f >= 0:
441 456 user = user[f + 1 :]
442 457 f = user.find(b' ')
443 458 if f >= 0:
444 459 user = user[:f]
445 460 f = user.find(b'.')
446 461 if f >= 0:
447 462 user = user[:f]
448 463 return user
449 464
450 465
451 def emailuser(user):
466 def emailuser(user: bytes) -> bytes:
452 467 """Return the user portion of an email address."""
453 468 f = user.find(b'@')
454 469 if f >= 0:
455 470 user = user[:f]
456 471 f = user.find(b'<')
457 472 if f >= 0:
458 473 user = user[f + 1 :]
459 474 return user
460 475
461 476
462 def email(author):
477 def email(author: bytes) -> bytes:
463 478 '''get email of author.'''
464 479 r = author.find(b'>')
465 480 if r == -1:
466 481 r = None
467 482 return author[author.find(b'<') + 1 : r]
468 483
469 484
470 def person(author):
485 def person(author: bytes) -> bytes:
471 486 """Returns the name before an email address,
472 487 interpreting it as per RFC 5322
473 488
474 489 >>> person(b'foo@bar')
475 490 'foo'
476 491 >>> person(b'Foo Bar <foo@bar>')
477 492 'Foo Bar'
478 493 >>> person(b'"Foo Bar" <foo@bar>')
479 494 'Foo Bar'
480 495 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
481 496 'Foo "buz" Bar'
482 497 >>> # The following are invalid, but do exist in real-life
483 498 ...
484 499 >>> person(b'Foo "buz" Bar <foo@bar>')
485 500 'Foo "buz" Bar'
486 501 >>> person(b'"Foo Bar <foo@bar>')
487 502 'Foo Bar'
488 503 """
489 504 if b'@' not in author:
490 505 return author
491 506 f = author.find(b'<')
492 507 if f != -1:
493 508 return author[:f].strip(b' "').replace(b'\\"', b'"')
494 509 f = author.find(b'@')
495 510 return author[:f].replace(b'.', b' ')
496 511
497 512
498 513 @attr.s(hash=True)
499 514 class mailmapping:
500 515 """Represents a username/email key or value in
501 516 a mailmap file"""
502 517
503 518 email = attr.ib()
504 519 name = attr.ib(default=None)
505 520
506 521
507 522 def _ismailmaplineinvalid(names, emails):
508 523 """Returns True if the parsed names and emails
509 524 in a mailmap entry are invalid.
510 525
511 526 >>> # No names or emails fails
512 527 >>> names, emails = [], []
513 528 >>> _ismailmaplineinvalid(names, emails)
514 529 True
515 530 >>> # Only one email fails
516 531 >>> emails = [b'email@email.com']
517 532 >>> _ismailmaplineinvalid(names, emails)
518 533 True
519 534 >>> # One email and one name passes
520 535 >>> names = [b'Test Name']
521 536 >>> _ismailmaplineinvalid(names, emails)
522 537 False
523 538 >>> # No names but two emails passes
524 539 >>> names = []
525 540 >>> emails = [b'proper@email.com', b'commit@email.com']
526 541 >>> _ismailmaplineinvalid(names, emails)
527 542 False
528 543 """
529 544 return not emails or not names and len(emails) < 2
530 545
531 546
532 547 def parsemailmap(mailmapcontent):
533 548 """Parses data in the .mailmap format
534 549
535 550 >>> mmdata = b"\\n".join([
536 551 ... b'# Comment',
537 552 ... b'Name <commit1@email.xx>',
538 553 ... b'<name@email.xx> <commit2@email.xx>',
539 554 ... b'Name <proper@email.xx> <commit3@email.xx>',
540 555 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
541 556 ... ])
542 557 >>> mm = parsemailmap(mmdata)
543 558 >>> for key in sorted(mm.keys()):
544 559 ... print(key)
545 560 mailmapping(email='commit1@email.xx', name=None)
546 561 mailmapping(email='commit2@email.xx', name=None)
547 562 mailmapping(email='commit3@email.xx', name=None)
548 563 mailmapping(email='commit4@email.xx', name='Commit')
549 564 >>> for val in sorted(mm.values()):
550 565 ... print(val)
551 566 mailmapping(email='commit1@email.xx', name='Name')
552 567 mailmapping(email='name@email.xx', name=None)
553 568 mailmapping(email='proper@email.xx', name='Name')
554 569 mailmapping(email='proper@email.xx', name='Name')
555 570 """
556 571 mailmap = {}
557 572
558 573 if mailmapcontent is None:
559 574 return mailmap
560 575
561 576 for line in mailmapcontent.splitlines():
562 577
563 578 # Don't bother checking the line if it is a comment or
564 579 # is an improperly formed author field
565 580 if line.lstrip().startswith(b'#'):
566 581 continue
567 582
568 583 # names, emails hold the parsed emails and names for each line
569 584 # name_builder holds the words in a persons name
570 585 names, emails = [], []
571 586 namebuilder = []
572 587
573 588 for element in line.split():
574 589 if element.startswith(b'#'):
575 590 # If we reach a comment in the mailmap file, move on
576 591 break
577 592
578 593 elif element.startswith(b'<') and element.endswith(b'>'):
579 594 # We have found an email.
580 595 # Parse it, and finalize any names from earlier
581 596 emails.append(element[1:-1]) # Slice off the "<>"
582 597
583 598 if namebuilder:
584 599 names.append(b' '.join(namebuilder))
585 600 namebuilder = []
586 601
587 602 # Break if we have found a second email, any other
588 603 # data does not fit the spec for .mailmap
589 604 if len(emails) > 1:
590 605 break
591 606
592 607 else:
593 608 # We have found another word in the committers name
594 609 namebuilder.append(element)
595 610
596 611 # Check to see if we have parsed the line into a valid form
597 612 # We require at least one email, and either at least one
598 613 # name or a second email
599 614 if _ismailmaplineinvalid(names, emails):
600 615 continue
601 616
602 617 mailmapkey = mailmapping(
603 618 email=emails[-1],
604 619 name=names[-1] if len(names) == 2 else None,
605 620 )
606 621
607 622 mailmap[mailmapkey] = mailmapping(
608 623 email=emails[0],
609 624 name=names[0] if names else None,
610 625 )
611 626
612 627 return mailmap
613 628
614 629
615 def mapname(mailmap, author):
630 def mapname(mailmap, author: bytes) -> bytes:
616 631 """Returns the author field according to the mailmap cache, or
617 632 the original author field.
618 633
619 634 >>> mmdata = b"\\n".join([
620 635 ... b'# Comment',
621 636 ... b'Name <commit1@email.xx>',
622 637 ... b'<name@email.xx> <commit2@email.xx>',
623 638 ... b'Name <proper@email.xx> <commit3@email.xx>',
624 639 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
625 640 ... ])
626 641 >>> m = parsemailmap(mmdata)
627 642 >>> mapname(m, b'Commit <commit1@email.xx>')
628 643 'Name <commit1@email.xx>'
629 644 >>> mapname(m, b'Name <commit2@email.xx>')
630 645 'Name <name@email.xx>'
631 646 >>> mapname(m, b'Commit <commit3@email.xx>')
632 647 'Name <proper@email.xx>'
633 648 >>> mapname(m, b'Commit <commit4@email.xx>')
634 649 'Name <proper@email.xx>'
635 650 >>> mapname(m, b'Unknown Name <unknown@email.com>')
636 651 'Unknown Name <unknown@email.com>'
637 652 """
638 653 # If the author field coming in isn't in the correct format,
639 654 # or the mailmap is empty just return the original author field
640 655 if not isauthorwellformed(author) or not mailmap:
641 656 return author
642 657
643 658 # Turn the user name into a mailmapping
644 659 commit = mailmapping(name=person(author), email=email(author))
645 660
646 661 try:
647 662 # Try and use both the commit email and name as the key
648 663 proper = mailmap[commit]
649 664
650 665 except KeyError:
651 666 # If the lookup fails, use just the email as the key instead
652 667 # We call this commit2 as not to erase original commit fields
653 668 commit2 = mailmapping(email=commit.email)
654 669 proper = mailmap.get(commit2, mailmapping(None, None))
655 670
656 671 # Return the author field with proper values filled in
657 672 return b'%s <%s>' % (
658 673 proper.name if proper.name else commit.name,
659 674 proper.email if proper.email else commit.email,
660 675 )
661 676
662 677
663 678 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
664 679
665 680
666 def isauthorwellformed(author):
681 def isauthorwellformed(author: bytes) -> bool:
667 682 """Return True if the author field is well formed
668 683 (ie "Contributor Name <contrib@email.dom>")
669 684
670 685 >>> isauthorwellformed(b'Good Author <good@author.com>')
671 686 True
672 687 >>> isauthorwellformed(b'Author <good@author.com>')
673 688 True
674 689 >>> isauthorwellformed(b'Bad Author')
675 690 False
676 691 >>> isauthorwellformed(b'Bad Author <author@author.com')
677 692 False
678 693 >>> isauthorwellformed(b'Bad Author author@author.com')
679 694 False
680 695 >>> isauthorwellformed(b'<author@author.com>')
681 696 False
682 697 >>> isauthorwellformed(b'Bad Author <author>')
683 698 False
684 699 """
685 700 return _correctauthorformat.match(author) is not None
686 701
687 702
688 def firstline(text):
703 def firstline(text: bytes) -> bytes:
689 704 """Return the first line of the input"""
690 705 # Try to avoid running splitlines() on the whole string
691 706 i = text.find(b'\n')
692 707 if i != -1:
693 708 text = text[:i]
694 709 try:
695 710 return text.splitlines()[0]
696 711 except IndexError:
697 712 return b''
698 713
699 714
700 def ellipsis(text, maxlength=400):
715 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
701 716 """Trim string to at most maxlength (default: 400) columns in display."""
702 717 return encoding.trim(text, maxlength, ellipsis=b'...')
703 718
704 719
705 def escapestr(s):
720 def escapestr(s: bytes) -> bytes:
721 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
706 722 if isinstance(s, memoryview):
707 723 s = bytes(s)
708 724 # call underlying function of s.encode('string_escape') directly for
709 725 # Python 3 compatibility
710 726 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
711 727
712 728
713 def unescapestr(s):
729 def unescapestr(s: bytes) -> bytes:
714 730 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
715 731
716 732
717 733 def forcebytestr(obj):
718 734 """Portably format an arbitrary object (e.g. exception) into a byte
719 735 string."""
720 736 try:
721 737 return pycompat.bytestr(obj)
722 738 except UnicodeEncodeError:
723 739 # non-ascii string, may be lossy
724 740 return pycompat.bytestr(encoding.strtolocal(str(obj)))
725 741
726 742
727 def uirepr(s):
743 def uirepr(s: bytes) -> bytes:
728 744 # Avoid double backslash in Windows path repr()
729 745 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
730 746
731 747
732 748 # delay import of textwrap
733 749 def _MBTextWrapper(**kwargs):
734 750 class tw(textwrap.TextWrapper):
735 751 """
736 752 Extend TextWrapper for width-awareness.
737 753
738 754 Neither number of 'bytes' in any encoding nor 'characters' is
739 755 appropriate to calculate terminal columns for specified string.
740 756
741 757 Original TextWrapper implementation uses built-in 'len()' directly,
742 758 so overriding is needed to use width information of each characters.
743 759
744 760 In addition, characters classified into 'ambiguous' width are
745 761 treated as wide in East Asian area, but as narrow in other.
746 762
747 763 This requires use decision to determine width of such characters.
748 764 """
749 765
750 766 def _cutdown(self, ucstr, space_left):
751 767 l = 0
752 768 colwidth = encoding.ucolwidth
753 769 for i in range(len(ucstr)):
754 770 l += colwidth(ucstr[i])
755 771 if space_left < l:
756 772 return (ucstr[:i], ucstr[i:])
757 773 return ucstr, b''
758 774
759 775 # overriding of base class
760 776 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
761 777 space_left = max(width - cur_len, 1)
762 778
763 779 if self.break_long_words:
764 780 cut, res = self._cutdown(reversed_chunks[-1], space_left)
765 781 cur_line.append(cut)
766 782 reversed_chunks[-1] = res
767 783 elif not cur_line:
768 784 cur_line.append(reversed_chunks.pop())
769 785
770 786 # this overriding code is imported from TextWrapper of Python 2.6
771 787 # to calculate columns of string by 'encoding.ucolwidth()'
772 788 def _wrap_chunks(self, chunks):
773 789 colwidth = encoding.ucolwidth
774 790
775 791 lines = []
776 792 if self.width <= 0:
777 793 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
778 794
779 795 # Arrange in reverse order so items can be efficiently popped
780 796 # from a stack of chucks.
781 797 chunks.reverse()
782 798
783 799 while chunks:
784 800
785 801 # Start the list of chunks that will make up the current line.
786 802 # cur_len is just the length of all the chunks in cur_line.
787 803 cur_line = []
788 804 cur_len = 0
789 805
790 806 # Figure out which static string will prefix this line.
791 807 if lines:
792 808 indent = self.subsequent_indent
793 809 else:
794 810 indent = self.initial_indent
795 811
796 812 # Maximum width for this line.
797 813 width = self.width - len(indent)
798 814
799 815 # First chunk on line is whitespace -- drop it, unless this
800 816 # is the very beginning of the text (i.e. no lines started yet).
801 817 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
802 818 del chunks[-1]
803 819
804 820 while chunks:
805 821 l = colwidth(chunks[-1])
806 822
807 823 # Can at least squeeze this chunk onto the current line.
808 824 if cur_len + l <= width:
809 825 cur_line.append(chunks.pop())
810 826 cur_len += l
811 827
812 828 # Nope, this line is full.
813 829 else:
814 830 break
815 831
816 832 # The current line is full, and the next chunk is too big to
817 833 # fit on *any* line (not just this one).
818 834 if chunks and colwidth(chunks[-1]) > width:
819 835 self._handle_long_word(chunks, cur_line, cur_len, width)
820 836
821 837 # If the last chunk on this line is all whitespace, drop it.
822 838 if (
823 839 self.drop_whitespace
824 840 and cur_line
825 841 and cur_line[-1].strip() == r''
826 842 ):
827 843 del cur_line[-1]
828 844
829 845 # Convert current line back to a string and store it in list
830 846 # of all lines (return value).
831 847 if cur_line:
832 848 lines.append(indent + ''.join(cur_line))
833 849
834 850 return lines
835 851
836 852 global _MBTextWrapper
837 853 _MBTextWrapper = tw
838 854 return tw(**kwargs)
839 855
840 856
841 def wrap(line, width, initindent=b'', hangindent=b''):
857 def wrap(
858 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
859 ) -> bytes:
842 860 maxindent = max(len(hangindent), len(initindent))
843 861 if width <= maxindent:
844 862 # adjust for weird terminal size
845 863 width = max(78, maxindent + 1)
846 864 line = line.decode(
847 865 pycompat.sysstr(encoding.encoding),
848 866 pycompat.sysstr(encoding.encodingmode),
849 867 )
850 868 initindent = initindent.decode(
851 869 pycompat.sysstr(encoding.encoding),
852 870 pycompat.sysstr(encoding.encodingmode),
853 871 )
854 872 hangindent = hangindent.decode(
855 873 pycompat.sysstr(encoding.encoding),
856 874 pycompat.sysstr(encoding.encodingmode),
857 875 )
858 876 wrapper = _MBTextWrapper(
859 877 width=width, initial_indent=initindent, subsequent_indent=hangindent
860 878 )
861 879 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
862 880
863 881
864 882 _booleans = {
865 883 b'1': True,
866 884 b'yes': True,
867 885 b'true': True,
868 886 b'on': True,
869 887 b'always': True,
870 888 b'0': False,
871 889 b'no': False,
872 890 b'false': False,
873 891 b'off': False,
874 892 b'never': False,
875 893 }
876 894
877 895
878 def parsebool(s):
896 def parsebool(s: bytes) -> Optional[bool]:
879 897 """Parse s into a boolean.
880 898
881 899 If s is not a valid boolean, returns None.
882 900 """
883 901 return _booleans.get(s.lower(), None)
884 902
885 903
886 def parselist(value):
904 # TODO: make arg mandatory (and fix code below?)
905 def parselist(value: Optional[bytes]):
887 906 """parse a configuration value as a list of comma/space separated strings
888 907
889 908 >>> parselist(b'this,is "a small" ,test')
890 909 ['this', 'is', 'a small', 'test']
891 910 """
892 911
893 912 def _parse_plain(parts, s, offset):
894 913 whitespace = False
895 914 while offset < len(s) and (
896 915 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
897 916 ):
898 917 whitespace = True
899 918 offset += 1
900 919 if offset >= len(s):
901 920 return None, parts, offset
902 921 if whitespace:
903 922 parts.append(b'')
904 923 if s[offset : offset + 1] == b'"' and not parts[-1]:
905 924 return _parse_quote, parts, offset + 1
906 925 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
907 926 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
908 927 return _parse_plain, parts, offset + 1
909 928 parts[-1] += s[offset : offset + 1]
910 929 return _parse_plain, parts, offset + 1
911 930
912 931 def _parse_quote(parts, s, offset):
913 932 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
914 933 parts.append(b'')
915 934 offset += 1
916 935 while offset < len(s) and (
917 936 s[offset : offset + 1].isspace()
918 937 or s[offset : offset + 1] == b','
919 938 ):
920 939 offset += 1
921 940 return _parse_plain, parts, offset
922 941
923 942 while offset < len(s) and s[offset : offset + 1] != b'"':
924 943 if (
925 944 s[offset : offset + 1] == b'\\'
926 945 and offset + 1 < len(s)
927 946 and s[offset + 1 : offset + 2] == b'"'
928 947 ):
929 948 offset += 1
930 949 parts[-1] += b'"'
931 950 else:
932 951 parts[-1] += s[offset : offset + 1]
933 952 offset += 1
934 953
935 954 if offset >= len(s):
936 955 real_parts = _configlist(parts[-1])
937 956 if not real_parts:
938 957 parts[-1] = b'"'
939 958 else:
940 959 real_parts[0] = b'"' + real_parts[0]
941 960 parts = parts[:-1]
942 961 parts.extend(real_parts)
943 962 return None, parts, offset
944 963
945 964 offset += 1
946 965 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
947 966 offset += 1
948 967
949 968 if offset < len(s):
950 969 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
951 970 parts[-1] += b'"'
952 971 offset += 1
953 972 else:
954 973 parts.append(b'')
955 974 else:
956 975 return None, parts, offset
957 976
958 977 return _parse_plain, parts, offset
959 978
960 979 def _configlist(s):
961 980 s = s.rstrip(b' ,')
962 981 if not s:
963 982 return []
964 983 parser, parts, offset = _parse_plain, [b''], 0
965 984 while parser:
966 985 parser, parts, offset = parser(parts, s, offset)
967 986 return parts
968 987
969 988 if value is not None and isinstance(value, bytes):
970 989 result = _configlist(value.lstrip(b' ,\n'))
971 990 else:
972 991 result = value
973 992 return result or []
974 993
975 994
976 def evalpythonliteral(s):
995 def evalpythonliteral(s: bytes):
977 996 """Evaluate a string containing a Python literal expression"""
978 997 # We could backport our tokenizer hack to rewrite '' to u'' if we want
979 998 return ast.literal_eval(s.decode('latin1'))
General Comments 0
You need to be logged in to leave comments. Login now