##// END OF EJS Templates
utils: accept bytearray arguments for escapestr
Joerg Sonnenberger -
r52725:4eccb65e default
parent child Browse files
Show More
@@ -1,1006 +1,1006 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10
11 11 import ast
12 12 import codecs
13 13 import re as remod
14 14 import textwrap
15 15 import types
16 16 import typing
17 17
18 18 from typing import (
19 19 Optional,
20 20 overload,
21 21 )
22 22
23 23 from ..i18n import _
24 24 from ..thirdparty import attr
25 25
26 26 # Force pytype to use the non-vendored package
27 27 if typing.TYPE_CHECKING:
28 28 # noinspection PyPackageRequirements
29 29 import attr
30 30
31 31 from .. import (
32 32 encoding,
33 33 error,
34 34 pycompat,
35 35 )
36 36
37 37 # regex special chars pulled from https://bugs.python.org/issue29995
38 38 # which was part of Python 3.7.
39 39 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
40 40 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
41 41 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
42 42
43 43
44 44 @overload
45 45 def reescape(pat: bytes) -> bytes:
46 46 ...
47 47
48 48
49 49 @overload
50 50 def reescape(pat: str) -> str:
51 51 ...
52 52
53 53
54 54 def reescape(pat):
55 55 """Drop-in replacement for re.escape."""
56 56 # NOTE: it is intentional that this works on unicodes and not
57 57 # bytes, as it's only possible to do the escaping with
58 58 # unicode.translate, not bytes.translate. Sigh.
59 59 wantuni = True
60 60 if isinstance(pat, bytes):
61 61 wantuni = False
62 62 pat = pat.decode('latin1')
63 63 pat = pat.translate(_regexescapemap)
64 64 if wantuni:
65 65 return pat
66 66 return pat.encode('latin1')
67 67
68 68
69 69 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
70 70 """Pretty print an object."""
71 71 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
72 72
73 73
74 74 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
75 75 """Pretty print an object to a generator of atoms.
76 76
77 77 ``bprefix`` is a flag influencing whether bytestrings are preferred with
78 78 a ``b''`` prefix.
79 79
80 80 ``indent`` controls whether collections and nested data structures
81 81 span multiple lines via the indentation amount in spaces. By default,
82 82 no newlines are emitted.
83 83
84 84 ``level`` specifies the initial indent level. Used if ``indent > 0``.
85 85 """
86 86
87 87 if isinstance(o, bytes):
88 88 if bprefix:
89 89 yield b"b'%s'" % escapestr(o)
90 90 else:
91 91 yield b"'%s'" % escapestr(o)
92 92 elif isinstance(o, bytearray):
93 93 # codecs.escape_encode() can't handle bytearray, so escapestr fails
94 94 # without coercion.
95 95 yield b"bytearray['%s']" % escapestr(bytes(o))
96 96 elif isinstance(o, list):
97 97 if not o:
98 98 yield b'[]'
99 99 return
100 100
101 101 yield b'['
102 102
103 103 if indent:
104 104 level += 1
105 105 yield b'\n'
106 106 yield b' ' * (level * indent)
107 107
108 108 for i, a in enumerate(o):
109 109 for chunk in pprintgen(
110 110 a, bprefix=bprefix, indent=indent, level=level
111 111 ):
112 112 yield chunk
113 113
114 114 if i + 1 < len(o):
115 115 if indent:
116 116 yield b',\n'
117 117 yield b' ' * (level * indent)
118 118 else:
119 119 yield b', '
120 120
121 121 if indent:
122 122 level -= 1
123 123 yield b'\n'
124 124 yield b' ' * (level * indent)
125 125
126 126 yield b']'
127 127 elif isinstance(o, dict):
128 128 if not o:
129 129 yield b'{}'
130 130 return
131 131
132 132 yield b'{'
133 133
134 134 if indent:
135 135 level += 1
136 136 yield b'\n'
137 137 yield b' ' * (level * indent)
138 138
139 139 for i, (k, v) in enumerate(sorted(o.items())):
140 140 for chunk in pprintgen(
141 141 k, bprefix=bprefix, indent=indent, level=level
142 142 ):
143 143 yield chunk
144 144
145 145 yield b': '
146 146
147 147 for chunk in pprintgen(
148 148 v, bprefix=bprefix, indent=indent, level=level
149 149 ):
150 150 yield chunk
151 151
152 152 if i + 1 < len(o):
153 153 if indent:
154 154 yield b',\n'
155 155 yield b' ' * (level * indent)
156 156 else:
157 157 yield b', '
158 158
159 159 if indent:
160 160 level -= 1
161 161 yield b'\n'
162 162 yield b' ' * (level * indent)
163 163
164 164 yield b'}'
165 165 elif isinstance(o, set):
166 166 if not o:
167 167 yield b'set([])'
168 168 return
169 169
170 170 yield b'set(['
171 171
172 172 if indent:
173 173 level += 1
174 174 yield b'\n'
175 175 yield b' ' * (level * indent)
176 176
177 177 for i, k in enumerate(sorted(o)):
178 178 for chunk in pprintgen(
179 179 k, bprefix=bprefix, indent=indent, level=level
180 180 ):
181 181 yield chunk
182 182
183 183 if i + 1 < len(o):
184 184 if indent:
185 185 yield b',\n'
186 186 yield b' ' * (level * indent)
187 187 else:
188 188 yield b', '
189 189
190 190 if indent:
191 191 level -= 1
192 192 yield b'\n'
193 193 yield b' ' * (level * indent)
194 194
195 195 yield b'])'
196 196 elif isinstance(o, tuple):
197 197 if not o:
198 198 yield b'()'
199 199 return
200 200
201 201 yield b'('
202 202
203 203 if indent:
204 204 level += 1
205 205 yield b'\n'
206 206 yield b' ' * (level * indent)
207 207
208 208 for i, a in enumerate(o):
209 209 for chunk in pprintgen(
210 210 a, bprefix=bprefix, indent=indent, level=level
211 211 ):
212 212 yield chunk
213 213
214 214 if i + 1 < len(o):
215 215 if indent:
216 216 yield b',\n'
217 217 yield b' ' * (level * indent)
218 218 else:
219 219 yield b', '
220 220
221 221 if indent:
222 222 level -= 1
223 223 yield b'\n'
224 224 yield b' ' * (level * indent)
225 225
226 226 yield b')'
227 227 elif isinstance(o, types.GeneratorType):
228 228 # Special case of empty generator.
229 229 try:
230 230 nextitem = next(o)
231 231 except StopIteration:
232 232 yield b'gen[]'
233 233 return
234 234
235 235 yield b'gen['
236 236
237 237 if indent:
238 238 level += 1
239 239 yield b'\n'
240 240 yield b' ' * (level * indent)
241 241
242 242 last = False
243 243
244 244 while not last:
245 245 current = nextitem
246 246
247 247 try:
248 248 nextitem = next(o)
249 249 except StopIteration:
250 250 last = True
251 251
252 252 for chunk in pprintgen(
253 253 current, bprefix=bprefix, indent=indent, level=level
254 254 ):
255 255 yield chunk
256 256
257 257 if not last:
258 258 if indent:
259 259 yield b',\n'
260 260 yield b' ' * (level * indent)
261 261 else:
262 262 yield b', '
263 263
264 264 if indent:
265 265 level -= 1
266 266 yield b'\n'
267 267 yield b' ' * (level * indent)
268 268
269 269 yield b']'
270 270 else:
271 271 yield pycompat.byterepr(o)
272 272
273 273
274 274 def prettyrepr(o) -> bytes:
275 275 """Pretty print a representation of a possibly-nested object"""
276 276 lines = []
277 277 rs = pycompat.byterepr(o)
278 278 p0 = p1 = 0
279 279 while p0 < len(rs):
280 280 # '... field=<type ... field=<type ...'
281 281 # ~~~~~~~~~~~~~~~~
282 282 # p0 p1 q0 q1
283 283 q0 = -1
284 284 q1 = rs.find(b'<', p1 + 1)
285 285 if q1 < 0:
286 286 q1 = len(rs)
287 287 # pytype: disable=wrong-arg-count
288 288 # TODO: figure out why pytype doesn't recognize the optional start
289 289 # arg
290 290 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
291 291 # pytype: enable=wrong-arg-count
292 292 # backtrack for ' field=<'
293 293 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
294 294 if q0 < 0:
295 295 q0 = q1
296 296 else:
297 297 q0 += 1 # skip ' '
298 298 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
299 299 assert l >= 0
300 300 lines.append((l, rs[p0:q0].rstrip()))
301 301 p0, p1 = q0, q1
302 302 return b'\n'.join(b' ' * l + s for l, s in lines)
303 303
304 304
305 305 def buildrepr(r) -> bytes:
306 306 """Format an optional printable representation from unexpanded bits
307 307
308 308 ======== =================================
309 309 type(r) example
310 310 ======== =================================
311 311 tuple ('<not %r>', other)
312 312 bytes '<branch closed>'
313 313 callable lambda: '<branch %r>' % sorted(b)
314 314 object other
315 315 ======== =================================
316 316 """
317 317 if r is None:
318 318 return b''
319 319 elif isinstance(r, tuple):
320 320 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
321 321 elif isinstance(r, bytes):
322 322 return r
323 323 elif callable(r):
324 324 return r()
325 325 else:
326 326 return pprint(r)
327 327
328 328
329 329 def binary(s: bytes) -> bool:
330 330 """return true if a string is binary data"""
331 331 return bool(s and b'\0' in s)
332 332
333 333
334 334 def _splitpattern(pattern: bytes):
335 335 if pattern.startswith(b're:'):
336 336 return b're', pattern[3:]
337 337 elif pattern.startswith(b'literal:'):
338 338 return b'literal', pattern[8:]
339 339 return b'literal', pattern
340 340
341 341
342 342 def stringmatcher(pattern: bytes, casesensitive: bool = True):
343 343 """
344 344 accepts a string, possibly starting with 're:' or 'literal:' prefix.
345 345 returns the matcher name, pattern, and matcher function.
346 346 missing or unknown prefixes are treated as literal matches.
347 347
348 348 helper for tests:
349 349 >>> def test(pattern, *tests):
350 350 ... kind, pattern, matcher = stringmatcher(pattern)
351 351 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
352 352 >>> def itest(pattern, *tests):
353 353 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
354 354 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
355 355
356 356 exact matching (no prefix):
357 357 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
358 358 ('literal', 'abcdefg', [False, False, True])
359 359
360 360 regex matching ('re:' prefix)
361 361 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
362 362 ('re', 'a.+b', [False, False, True])
363 363
364 364 force exact matches ('literal:' prefix)
365 365 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
366 366 ('literal', 're:foobar', [False, True])
367 367
368 368 unknown prefixes are ignored and treated as literals
369 369 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
370 370 ('literal', 'foo:bar', [False, False, True])
371 371
372 372 case insensitive regex matches
373 373 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
374 374 ('re', 'A.+b', [False, False, True])
375 375
376 376 case insensitive literal matches
377 377 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
378 378 ('literal', 'ABCDEFG', [False, False, True])
379 379 """
380 380 kind, pattern = _splitpattern(pattern)
381 381 if kind == b're':
382 382 try:
383 383 flags = 0
384 384 if not casesensitive:
385 385 flags = remod.I
386 386 regex = remod.compile(pattern, flags)
387 387 except remod.error as e:
388 388 raise error.ParseError(
389 389 _(b'invalid regular expression: %s') % forcebytestr(e)
390 390 )
391 391 return kind, pattern, regex.search
392 392 elif kind == b'literal':
393 393 if casesensitive:
394 394 match = pattern.__eq__
395 395 else:
396 396 ipat = encoding.lower(pattern)
397 397 match = lambda s: ipat == encoding.lower(s)
398 398 return kind, pattern, match
399 399
400 400 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
401 401
402 402
403 403 def substringregexp(pattern: bytes, flags: int = 0):
404 404 """Build a regexp object from a string pattern possibly starting with
405 405 're:' or 'literal:' prefix.
406 406
407 407 helper for tests:
408 408 >>> def test(pattern, *tests):
409 409 ... regexp = substringregexp(pattern)
410 410 ... return [bool(regexp.search(t)) for t in tests]
411 411 >>> def itest(pattern, *tests):
412 412 ... regexp = substringregexp(pattern, remod.I)
413 413 ... return [bool(regexp.search(t)) for t in tests]
414 414
415 415 substring matching (no prefix):
416 416 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
417 417 [False, False, True]
418 418
419 419 substring pattern should be escaped:
420 420 >>> substringregexp(b'.bc').pattern
421 421 '\\\\.bc'
422 422 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
423 423 [False, False, False]
424 424
425 425 regex matching ('re:' prefix)
426 426 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
427 427 [False, False, True]
428 428
429 429 force substring matches ('literal:' prefix)
430 430 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
431 431 [False, True]
432 432
433 433 case insensitive literal matches
434 434 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
435 435 [False, False, True]
436 436
437 437 case insensitive regex matches
438 438 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
439 439 [False, False, True]
440 440 """
441 441 kind, pattern = _splitpattern(pattern)
442 442 if kind == b're':
443 443 try:
444 444 return remod.compile(pattern, flags)
445 445 except remod.error as e:
446 446 raise error.ParseError(
447 447 _(b'invalid regular expression: %s') % forcebytestr(e)
448 448 )
449 449 elif kind == b'literal':
450 450 return remod.compile(remod.escape(pattern), flags)
451 451
452 452 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
453 453
454 454
455 455 def shortuser(user: bytes) -> bytes:
456 456 """Return a short representation of a user name or email address."""
457 457 f = user.find(b'@')
458 458 if f >= 0:
459 459 user = user[:f]
460 460 f = user.find(b'<')
461 461 if f >= 0:
462 462 user = user[f + 1 :]
463 463 f = user.find(b' ')
464 464 if f >= 0:
465 465 user = user[:f]
466 466 f = user.find(b'.')
467 467 if f >= 0:
468 468 user = user[:f]
469 469 return user
470 470
471 471
472 472 def emailuser(user: bytes) -> bytes:
473 473 """Return the user portion of an email address."""
474 474 f = user.find(b'@')
475 475 if f >= 0:
476 476 user = user[:f]
477 477 f = user.find(b'<')
478 478 if f >= 0:
479 479 user = user[f + 1 :]
480 480 return user
481 481
482 482
483 483 def email(author: bytes) -> bytes:
484 484 '''get email of author.'''
485 485 r = author.find(b'>')
486 486 if r == -1:
487 487 r = None
488 488 return author[author.find(b'<') + 1 : r]
489 489
490 490
491 491 def person(author: bytes) -> bytes:
492 492 """Returns the name before an email address,
493 493 interpreting it as per RFC 5322
494 494
495 495 >>> person(b'foo@bar')
496 496 'foo'
497 497 >>> person(b'Foo Bar <foo@bar>')
498 498 'Foo Bar'
499 499 >>> person(b'"Foo Bar" <foo@bar>')
500 500 'Foo Bar'
501 501 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
502 502 'Foo "buz" Bar'
503 503 >>> # The following are invalid, but do exist in real-life
504 504 ...
505 505 >>> person(b'Foo "buz" Bar <foo@bar>')
506 506 'Foo "buz" Bar'
507 507 >>> person(b'"Foo Bar <foo@bar>')
508 508 'Foo Bar'
509 509 """
510 510 if b'@' not in author:
511 511 return author
512 512 f = author.find(b'<')
513 513 if f != -1:
514 514 return author[:f].strip(b' "').replace(b'\\"', b'"')
515 515 f = author.find(b'@')
516 516 return author[:f].replace(b'.', b' ')
517 517
518 518
519 519 @attr.s(hash=True)
520 520 class mailmapping:
521 521 """Represents a username/email key or value in
522 522 a mailmap file"""
523 523
524 524 email = attr.ib()
525 525 name = attr.ib(default=None)
526 526
527 527
528 528 def _ismailmaplineinvalid(names, emails):
529 529 """Returns True if the parsed names and emails
530 530 in a mailmap entry are invalid.
531 531
532 532 >>> # No names or emails fails
533 533 >>> names, emails = [], []
534 534 >>> _ismailmaplineinvalid(names, emails)
535 535 True
536 536 >>> # Only one email fails
537 537 >>> emails = [b'email@email.com']
538 538 >>> _ismailmaplineinvalid(names, emails)
539 539 True
540 540 >>> # One email and one name passes
541 541 >>> names = [b'Test Name']
542 542 >>> _ismailmaplineinvalid(names, emails)
543 543 False
544 544 >>> # No names but two emails passes
545 545 >>> names = []
546 546 >>> emails = [b'proper@email.com', b'commit@email.com']
547 547 >>> _ismailmaplineinvalid(names, emails)
548 548 False
549 549 """
550 550 return not emails or not names and len(emails) < 2
551 551
552 552
553 553 def parsemailmap(mailmapcontent):
554 554 """Parses data in the .mailmap format
555 555
556 556 >>> mmdata = b"\\n".join([
557 557 ... b'# Comment',
558 558 ... b'Name <commit1@email.xx>',
559 559 ... b'<name@email.xx> <commit2@email.xx>',
560 560 ... b'Name <proper@email.xx> <commit3@email.xx>',
561 561 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
562 562 ... ])
563 563 >>> mm = parsemailmap(mmdata)
564 564 >>> for key in sorted(mm.keys()):
565 565 ... print(key)
566 566 mailmapping(email='commit1@email.xx', name=None)
567 567 mailmapping(email='commit2@email.xx', name=None)
568 568 mailmapping(email='commit3@email.xx', name=None)
569 569 mailmapping(email='commit4@email.xx', name='Commit')
570 570 >>> for val in sorted(mm.values()):
571 571 ... print(val)
572 572 mailmapping(email='commit1@email.xx', name='Name')
573 573 mailmapping(email='name@email.xx', name=None)
574 574 mailmapping(email='proper@email.xx', name='Name')
575 575 mailmapping(email='proper@email.xx', name='Name')
576 576 """
577 577 mailmap = {}
578 578
579 579 if mailmapcontent is None:
580 580 return mailmap
581 581
582 582 for line in mailmapcontent.splitlines():
583 583 # Don't bother checking the line if it is a comment or
584 584 # is an improperly formed author field
585 585 if line.lstrip().startswith(b'#'):
586 586 continue
587 587
588 588 # names, emails hold the parsed emails and names for each line
589 589 # name_builder holds the words in a persons name
590 590 names, emails = [], []
591 591 namebuilder = []
592 592
593 593 for element in line.split():
594 594 if element.startswith(b'#'):
595 595 # If we reach a comment in the mailmap file, move on
596 596 break
597 597
598 598 elif element.startswith(b'<') and element.endswith(b'>'):
599 599 # We have found an email.
600 600 # Parse it, and finalize any names from earlier
601 601 emails.append(element[1:-1]) # Slice off the "<>"
602 602
603 603 if namebuilder:
604 604 names.append(b' '.join(namebuilder))
605 605 namebuilder = []
606 606
607 607 # Break if we have found a second email, any other
608 608 # data does not fit the spec for .mailmap
609 609 if len(emails) > 1:
610 610 break
611 611
612 612 else:
613 613 # We have found another word in the committers name
614 614 namebuilder.append(element)
615 615
616 616 # Check to see if we have parsed the line into a valid form
617 617 # We require at least one email, and either at least one
618 618 # name or a second email
619 619 if _ismailmaplineinvalid(names, emails):
620 620 continue
621 621
622 622 mailmapkey = mailmapping(
623 623 email=emails[-1],
624 624 name=names[-1] if len(names) == 2 else None,
625 625 )
626 626
627 627 mailmap[mailmapkey] = mailmapping(
628 628 email=emails[0],
629 629 name=names[0] if names else None,
630 630 )
631 631
632 632 return mailmap
633 633
634 634
635 635 def mapname(mailmap, author: bytes) -> bytes:
636 636 """Returns the author field according to the mailmap cache, or
637 637 the original author field.
638 638
639 639 >>> mmdata = b"\\n".join([
640 640 ... b'# Comment',
641 641 ... b'Name <commit1@email.xx>',
642 642 ... b'<name@email.xx> <commit2@email.xx>',
643 643 ... b'Name <proper@email.xx> <commit3@email.xx>',
644 644 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
645 645 ... ])
646 646 >>> m = parsemailmap(mmdata)
647 647 >>> mapname(m, b'Commit <commit1@email.xx>')
648 648 'Name <commit1@email.xx>'
649 649 >>> mapname(m, b'Name <commit2@email.xx>')
650 650 'Name <name@email.xx>'
651 651 >>> mapname(m, b'Commit <commit3@email.xx>')
652 652 'Name <proper@email.xx>'
653 653 >>> mapname(m, b'Commit <commit4@email.xx>')
654 654 'Name <proper@email.xx>'
655 655 >>> mapname(m, b'Unknown Name <unknown@email.com>')
656 656 'Unknown Name <unknown@email.com>'
657 657 """
658 658 # If the author field coming in isn't in the correct format,
659 659 # or the mailmap is empty just return the original author field
660 660 if not isauthorwellformed(author) or not mailmap:
661 661 return author
662 662
663 663 # Turn the user name into a mailmapping
664 664 commit = mailmapping(name=person(author), email=email(author))
665 665
666 666 try:
667 667 # Try and use both the commit email and name as the key
668 668 proper = mailmap[commit]
669 669
670 670 except KeyError:
671 671 # If the lookup fails, use just the email as the key instead
672 672 # We call this commit2 as not to erase original commit fields
673 673 commit2 = mailmapping(email=commit.email)
674 674 proper = mailmap.get(commit2, mailmapping(None, None))
675 675
676 676 # Return the author field with proper values filled in
677 677 return b'%s <%s>' % (
678 678 proper.name if proper.name else commit.name,
679 679 proper.email if proper.email else commit.email,
680 680 )
681 681
682 682
683 683 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
684 684
685 685
686 686 def isauthorwellformed(author: bytes) -> bool:
687 687 """Return True if the author field is well formed
688 688 (ie "Contributor Name <contrib@email.dom>")
689 689
690 690 >>> isauthorwellformed(b'Good Author <good@author.com>')
691 691 True
692 692 >>> isauthorwellformed(b'Author <good@author.com>')
693 693 True
694 694 >>> isauthorwellformed(b'Bad Author')
695 695 False
696 696 >>> isauthorwellformed(b'Bad Author <author@author.com')
697 697 False
698 698 >>> isauthorwellformed(b'Bad Author author@author.com')
699 699 False
700 700 >>> isauthorwellformed(b'<author@author.com>')
701 701 False
702 702 >>> isauthorwellformed(b'Bad Author <author>')
703 703 False
704 704 """
705 705 return _correctauthorformat.match(author) is not None
706 706
707 707
708 708 def firstline(text: bytes) -> bytes:
709 709 """Return the first line of the input"""
710 710 # Try to avoid running splitlines() on the whole string
711 711 i = text.find(b'\n')
712 712 if i != -1:
713 713 text = text[:i]
714 714 try:
715 715 return text.splitlines()[0]
716 716 except IndexError:
717 717 return b''
718 718
719 719
720 720 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
721 721 """Trim string to at most maxlength (default: 400) columns in display."""
722 722 return encoding.trim(text, maxlength, ellipsis=b'...')
723 723
724 724
725 725 def escapestr(s: bytes) -> bytes:
726 726 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
727 if isinstance(s, memoryview):
727 if isinstance(s, (memoryview, bytearray)):
728 728 s = bytes(s)
729 729 # call underlying function of s.encode('string_escape') directly for
730 730 # Python 3 compatibility
731 731 # pytype: disable=bad-return-type
732 732 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
733 733 # pytype: enable=bad-return-type
734 734
735 735
736 736 def unescapestr(s: bytes) -> bytes:
737 737 # pytype: disable=bad-return-type
738 738 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
739 739 # pytype: enable=bad-return-type
740 740
741 741
742 742 def forcebytestr(obj):
743 743 """Portably format an arbitrary object (e.g. exception) into a byte
744 744 string."""
745 745 try:
746 746 return pycompat.bytestr(obj)
747 747 except UnicodeEncodeError:
748 748 # non-ascii string, may be lossy
749 749 return pycompat.bytestr(encoding.strtolocal(str(obj)))
750 750
751 751
752 752 def uirepr(s: bytes) -> bytes:
753 753 # Avoid double backslash in Windows path repr()
754 754 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
755 755
756 756
757 757 # delay import of textwrap
758 758 def _MBTextWrapper(**kwargs):
759 759 class tw(textwrap.TextWrapper):
760 760 """
761 761 Extend TextWrapper for width-awareness.
762 762
763 763 Neither number of 'bytes' in any encoding nor 'characters' is
764 764 appropriate to calculate terminal columns for specified string.
765 765
766 766 Original TextWrapper implementation uses built-in 'len()' directly,
767 767 so overriding is needed to use width information of each characters.
768 768
769 769 In addition, characters classified into 'ambiguous' width are
770 770 treated as wide in East Asian area, but as narrow in other.
771 771
772 772 This requires use decision to determine width of such characters.
773 773 """
774 774
775 775 def _cutdown(self, ucstr, space_left):
776 776 l = 0
777 777 colwidth = encoding.ucolwidth
778 778 for i in range(len(ucstr)):
779 779 l += colwidth(ucstr[i])
780 780 if space_left < l:
781 781 return (ucstr[:i], ucstr[i:])
782 782 return ucstr, b''
783 783
784 784 # overriding of base class
785 785 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
786 786 space_left = max(width - cur_len, 1)
787 787
788 788 if self.break_long_words:
789 789 cut, res = self._cutdown(reversed_chunks[-1], space_left)
790 790 cur_line.append(cut)
791 791 reversed_chunks[-1] = res
792 792 elif not cur_line:
793 793 cur_line.append(reversed_chunks.pop())
794 794
795 795 # this overriding code is imported from TextWrapper of Python 2.6
796 796 # to calculate columns of string by 'encoding.ucolwidth()'
797 797 def _wrap_chunks(self, chunks):
798 798 colwidth = encoding.ucolwidth
799 799
800 800 lines = []
801 801 if self.width <= 0:
802 802 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
803 803
804 804 # Arrange in reverse order so items can be efficiently popped
805 805 # from a stack of chucks.
806 806 chunks.reverse()
807 807
808 808 while chunks:
809 809 # Start the list of chunks that will make up the current line.
810 810 # cur_len is just the length of all the chunks in cur_line.
811 811 cur_line = []
812 812 cur_len = 0
813 813
814 814 # Figure out which static string will prefix this line.
815 815 if lines:
816 816 indent = self.subsequent_indent
817 817 else:
818 818 indent = self.initial_indent
819 819
820 820 # Maximum width for this line.
821 821 width = self.width - len(indent)
822 822
823 823 # First chunk on line is whitespace -- drop it, unless this
824 824 # is the very beginning of the text (i.e. no lines started yet).
825 825 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
826 826 del chunks[-1]
827 827
828 828 while chunks:
829 829 l = colwidth(chunks[-1])
830 830
831 831 # Can at least squeeze this chunk onto the current line.
832 832 if cur_len + l <= width:
833 833 cur_line.append(chunks.pop())
834 834 cur_len += l
835 835
836 836 # Nope, this line is full.
837 837 else:
838 838 break
839 839
840 840 # The current line is full, and the next chunk is too big to
841 841 # fit on *any* line (not just this one).
842 842 if chunks and colwidth(chunks[-1]) > width:
843 843 self._handle_long_word(chunks, cur_line, cur_len, width)
844 844
845 845 # If the last chunk on this line is all whitespace, drop it.
846 846 if (
847 847 self.drop_whitespace
848 848 and cur_line
849 849 and cur_line[-1].strip() == r''
850 850 ):
851 851 del cur_line[-1]
852 852
853 853 # Convert current line back to a string and store it in list
854 854 # of all lines (return value).
855 855 if cur_line:
856 856 lines.append(indent + ''.join(cur_line))
857 857
858 858 return lines
859 859
860 860 global _MBTextWrapper
861 861 _MBTextWrapper = tw
862 862 return tw(**kwargs)
863 863
864 864
865 865 def wrap(
866 866 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
867 867 ) -> bytes:
868 868 maxindent = max(len(hangindent), len(initindent))
869 869 if width <= maxindent:
870 870 # adjust for weird terminal size
871 871 width = max(78, maxindent + 1)
872 872 line = line.decode(
873 873 pycompat.sysstr(encoding.encoding),
874 874 pycompat.sysstr(encoding.encodingmode),
875 875 )
876 876 initindent = initindent.decode(
877 877 pycompat.sysstr(encoding.encoding),
878 878 pycompat.sysstr(encoding.encodingmode),
879 879 )
880 880 hangindent = hangindent.decode(
881 881 pycompat.sysstr(encoding.encoding),
882 882 pycompat.sysstr(encoding.encodingmode),
883 883 )
884 884 wrapper = _MBTextWrapper(
885 885 width=width, initial_indent=initindent, subsequent_indent=hangindent
886 886 )
887 887 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
888 888
889 889
890 890 _booleans = {
891 891 b'1': True,
892 892 b'yes': True,
893 893 b'true': True,
894 894 b'on': True,
895 895 b'always': True,
896 896 b'0': False,
897 897 b'no': False,
898 898 b'false': False,
899 899 b'off': False,
900 900 b'never': False,
901 901 }
902 902
903 903
904 904 def parsebool(s: bytes) -> Optional[bool]:
905 905 """Parse s into a boolean.
906 906
907 907 If s is not a valid boolean, returns None.
908 908 """
909 909 return _booleans.get(s.lower(), None)
910 910
911 911
912 912 # TODO: make arg mandatory (and fix code below?)
913 913 def parselist(value: Optional[bytes]):
914 914 """parse a configuration value as a list of comma/space separated strings
915 915
916 916 >>> parselist(b'this,is "a small" ,test')
917 917 ['this', 'is', 'a small', 'test']
918 918 """
919 919
920 920 def _parse_plain(parts, s, offset):
921 921 whitespace = False
922 922 while offset < len(s) and (
923 923 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
924 924 ):
925 925 whitespace = True
926 926 offset += 1
927 927 if offset >= len(s):
928 928 return None, parts, offset
929 929 if whitespace:
930 930 parts.append(b'')
931 931 if s[offset : offset + 1] == b'"' and not parts[-1]:
932 932 return _parse_quote, parts, offset + 1
933 933 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
934 934 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
935 935 return _parse_plain, parts, offset + 1
936 936 parts[-1] += s[offset : offset + 1]
937 937 return _parse_plain, parts, offset + 1
938 938
939 939 def _parse_quote(parts, s, offset):
940 940 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
941 941 parts.append(b'')
942 942 offset += 1
943 943 while offset < len(s) and (
944 944 s[offset : offset + 1].isspace()
945 945 or s[offset : offset + 1] == b','
946 946 ):
947 947 offset += 1
948 948 return _parse_plain, parts, offset
949 949
950 950 while offset < len(s) and s[offset : offset + 1] != b'"':
951 951 if (
952 952 s[offset : offset + 1] == b'\\'
953 953 and offset + 1 < len(s)
954 954 and s[offset + 1 : offset + 2] == b'"'
955 955 ):
956 956 offset += 1
957 957 parts[-1] += b'"'
958 958 else:
959 959 parts[-1] += s[offset : offset + 1]
960 960 offset += 1
961 961
962 962 if offset >= len(s):
963 963 real_parts = _configlist(parts[-1])
964 964 if not real_parts:
965 965 parts[-1] = b'"'
966 966 else:
967 967 real_parts[0] = b'"' + real_parts[0]
968 968 parts = parts[:-1]
969 969 parts.extend(real_parts)
970 970 return None, parts, offset
971 971
972 972 offset += 1
973 973 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
974 974 offset += 1
975 975
976 976 if offset < len(s):
977 977 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
978 978 parts[-1] += b'"'
979 979 offset += 1
980 980 else:
981 981 parts.append(b'')
982 982 else:
983 983 return None, parts, offset
984 984
985 985 return _parse_plain, parts, offset
986 986
987 987 def _configlist(s):
988 988 s = s.rstrip(b' ,')
989 989 if not s:
990 990 return []
991 991 parser, parts, offset = _parse_plain, [b''], 0
992 992 while parser:
993 993 parser, parts, offset = parser(parts, s, offset)
994 994 return parts
995 995
996 996 if value is not None and isinstance(value, bytes):
997 997 result = _configlist(value.lstrip(b' ,\n'))
998 998 else:
999 999 result = value
1000 1000 return result or []
1001 1001
1002 1002
1003 1003 def evalpythonliteral(s: bytes):
1004 1004 """Evaluate a string containing a Python literal expression"""
1005 1005 # We could backport our tokenizer hack to rewrite '' to u'' if we want
1006 1006 return ast.literal_eval(s.decode('latin1'))
General Comments 0
You need to be logged in to leave comments. Login now