##// END OF EJS Templates
stringutil: remove Python 2 support code...
Gregory Szorc -
r49766:46b3ecfb default
parent child Browse files
Show More
@@ -1,969 +1,967 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10
11 11 import ast
12 12 import codecs
13 13 import re as remod
14 14 import textwrap
15 15 import types
16 16
17 17 from ..i18n import _
18 18 from ..thirdparty import attr
19 19
20 20 from .. import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 )
25 25
26 26 # regex special chars pulled from https://bugs.python.org/issue29995
27 27 # which was part of Python 3.7.
28 28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 30 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
31 31
32 32
33 33 def reescape(pat):
34 34 """Drop-in replacement for re.escape."""
35 35 # NOTE: it is intentional that this works on unicodes and not
36 36 # bytes, as it's only possible to do the escaping with
37 37 # unicode.translate, not bytes.translate. Sigh.
38 38 wantuni = True
39 39 if isinstance(pat, bytes):
40 40 wantuni = False
41 41 pat = pat.decode('latin1')
42 42 pat = pat.translate(_regexescapemap)
43 43 if wantuni:
44 44 return pat
45 45 return pat.encode('latin1')
46 46
47 47
48 48 def pprint(o, bprefix=False, indent=0, level=0):
49 49 """Pretty print an object."""
50 50 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
51 51
52 52
53 53 def pprintgen(o, bprefix=False, indent=0, level=0):
54 54 """Pretty print an object to a generator of atoms.
55 55
56 56 ``bprefix`` is a flag influencing whether bytestrings are preferred with
57 57 a ``b''`` prefix.
58 58
59 59 ``indent`` controls whether collections and nested data structures
60 60 span multiple lines via the indentation amount in spaces. By default,
61 61 no newlines are emitted.
62 62
63 63 ``level`` specifies the initial indent level. Used if ``indent > 0``.
64 64 """
65 65
66 66 if isinstance(o, bytes):
67 67 if bprefix:
68 68 yield b"b'%s'" % escapestr(o)
69 69 else:
70 70 yield b"'%s'" % escapestr(o)
71 71 elif isinstance(o, bytearray):
72 72 # codecs.escape_encode() can't handle bytearray, so escapestr fails
73 73 # without coercion.
74 74 yield b"bytearray['%s']" % escapestr(bytes(o))
75 75 elif isinstance(o, list):
76 76 if not o:
77 77 yield b'[]'
78 78 return
79 79
80 80 yield b'['
81 81
82 82 if indent:
83 83 level += 1
84 84 yield b'\n'
85 85 yield b' ' * (level * indent)
86 86
87 87 for i, a in enumerate(o):
88 88 for chunk in pprintgen(
89 89 a, bprefix=bprefix, indent=indent, level=level
90 90 ):
91 91 yield chunk
92 92
93 93 if i + 1 < len(o):
94 94 if indent:
95 95 yield b',\n'
96 96 yield b' ' * (level * indent)
97 97 else:
98 98 yield b', '
99 99
100 100 if indent:
101 101 level -= 1
102 102 yield b'\n'
103 103 yield b' ' * (level * indent)
104 104
105 105 yield b']'
106 106 elif isinstance(o, dict):
107 107 if not o:
108 108 yield b'{}'
109 109 return
110 110
111 111 yield b'{'
112 112
113 113 if indent:
114 114 level += 1
115 115 yield b'\n'
116 116 yield b' ' * (level * indent)
117 117
118 118 for i, (k, v) in enumerate(sorted(o.items())):
119 119 for chunk in pprintgen(
120 120 k, bprefix=bprefix, indent=indent, level=level
121 121 ):
122 122 yield chunk
123 123
124 124 yield b': '
125 125
126 126 for chunk in pprintgen(
127 127 v, bprefix=bprefix, indent=indent, level=level
128 128 ):
129 129 yield chunk
130 130
131 131 if i + 1 < len(o):
132 132 if indent:
133 133 yield b',\n'
134 134 yield b' ' * (level * indent)
135 135 else:
136 136 yield b', '
137 137
138 138 if indent:
139 139 level -= 1
140 140 yield b'\n'
141 141 yield b' ' * (level * indent)
142 142
143 143 yield b'}'
144 144 elif isinstance(o, set):
145 145 if not o:
146 146 yield b'set([])'
147 147 return
148 148
149 149 yield b'set(['
150 150
151 151 if indent:
152 152 level += 1
153 153 yield b'\n'
154 154 yield b' ' * (level * indent)
155 155
156 156 for i, k in enumerate(sorted(o)):
157 157 for chunk in pprintgen(
158 158 k, bprefix=bprefix, indent=indent, level=level
159 159 ):
160 160 yield chunk
161 161
162 162 if i + 1 < len(o):
163 163 if indent:
164 164 yield b',\n'
165 165 yield b' ' * (level * indent)
166 166 else:
167 167 yield b', '
168 168
169 169 if indent:
170 170 level -= 1
171 171 yield b'\n'
172 172 yield b' ' * (level * indent)
173 173
174 174 yield b'])'
175 175 elif isinstance(o, tuple):
176 176 if not o:
177 177 yield b'()'
178 178 return
179 179
180 180 yield b'('
181 181
182 182 if indent:
183 183 level += 1
184 184 yield b'\n'
185 185 yield b' ' * (level * indent)
186 186
187 187 for i, a in enumerate(o):
188 188 for chunk in pprintgen(
189 189 a, bprefix=bprefix, indent=indent, level=level
190 190 ):
191 191 yield chunk
192 192
193 193 if i + 1 < len(o):
194 194 if indent:
195 195 yield b',\n'
196 196 yield b' ' * (level * indent)
197 197 else:
198 198 yield b', '
199 199
200 200 if indent:
201 201 level -= 1
202 202 yield b'\n'
203 203 yield b' ' * (level * indent)
204 204
205 205 yield b')'
206 206 elif isinstance(o, types.GeneratorType):
207 207 # Special case of empty generator.
208 208 try:
209 209 nextitem = next(o)
210 210 except StopIteration:
211 211 yield b'gen[]'
212 212 return
213 213
214 214 yield b'gen['
215 215
216 216 if indent:
217 217 level += 1
218 218 yield b'\n'
219 219 yield b' ' * (level * indent)
220 220
221 221 last = False
222 222
223 223 while not last:
224 224 current = nextitem
225 225
226 226 try:
227 227 nextitem = next(o)
228 228 except StopIteration:
229 229 last = True
230 230
231 231 for chunk in pprintgen(
232 232 current, bprefix=bprefix, indent=indent, level=level
233 233 ):
234 234 yield chunk
235 235
236 236 if not last:
237 237 if indent:
238 238 yield b',\n'
239 239 yield b' ' * (level * indent)
240 240 else:
241 241 yield b', '
242 242
243 243 if indent:
244 244 level -= 1
245 245 yield b'\n'
246 246 yield b' ' * (level * indent)
247 247
248 248 yield b']'
249 249 else:
250 250 yield pycompat.byterepr(o)
251 251
252 252
253 253 def prettyrepr(o):
254 254 """Pretty print a representation of a possibly-nested object"""
255 255 lines = []
256 256 rs = pycompat.byterepr(o)
257 257 p0 = p1 = 0
258 258 while p0 < len(rs):
259 259 # '... field=<type ... field=<type ...'
260 260 # ~~~~~~~~~~~~~~~~
261 261 # p0 p1 q0 q1
262 262 q0 = -1
263 263 q1 = rs.find(b'<', p1 + 1)
264 264 if q1 < 0:
265 265 q1 = len(rs)
266 266 # pytype: disable=wrong-arg-count
267 267 # TODO: figure out why pytype doesn't recognize the optional start
268 268 # arg
269 269 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
270 270 # pytype: enable=wrong-arg-count
271 271 # backtrack for ' field=<'
272 272 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
273 273 if q0 < 0:
274 274 q0 = q1
275 275 else:
276 276 q0 += 1 # skip ' '
277 277 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
278 278 assert l >= 0
279 279 lines.append((l, rs[p0:q0].rstrip()))
280 280 p0, p1 = q0, q1
281 281 return b'\n'.join(b' ' * l + s for l, s in lines)
282 282
283 283
284 284 def buildrepr(r):
285 285 """Format an optional printable representation from unexpanded bits
286 286
287 287 ======== =================================
288 288 type(r) example
289 289 ======== =================================
290 290 tuple ('<not %r>', other)
291 291 bytes '<branch closed>'
292 292 callable lambda: '<branch %r>' % sorted(b)
293 293 object other
294 294 ======== =================================
295 295 """
296 296 if r is None:
297 297 return b''
298 298 elif isinstance(r, tuple):
299 299 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
300 300 elif isinstance(r, bytes):
301 301 return r
302 302 elif callable(r):
303 303 return r()
304 304 else:
305 305 return pprint(r)
306 306
307 307
308 308 def binary(s):
309 309 """return true if a string is binary data"""
310 310 return bool(s and b'\0' in s)
311 311
312 312
313 313 def _splitpattern(pattern):
314 314 if pattern.startswith(b're:'):
315 315 return b're', pattern[3:]
316 316 elif pattern.startswith(b'literal:'):
317 317 return b'literal', pattern[8:]
318 318 return b'literal', pattern
319 319
320 320
321 321 def stringmatcher(pattern, casesensitive=True):
322 322 """
323 323 accepts a string, possibly starting with 're:' or 'literal:' prefix.
324 324 returns the matcher name, pattern, and matcher function.
325 325 missing or unknown prefixes are treated as literal matches.
326 326
327 327 helper for tests:
328 328 >>> def test(pattern, *tests):
329 329 ... kind, pattern, matcher = stringmatcher(pattern)
330 330 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
331 331 >>> def itest(pattern, *tests):
332 332 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
333 333 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
334 334
335 335 exact matching (no prefix):
336 336 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
337 337 ('literal', 'abcdefg', [False, False, True])
338 338
339 339 regex matching ('re:' prefix)
340 340 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
341 341 ('re', 'a.+b', [False, False, True])
342 342
343 343 force exact matches ('literal:' prefix)
344 344 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
345 345 ('literal', 're:foobar', [False, True])
346 346
347 347 unknown prefixes are ignored and treated as literals
348 348 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
349 349 ('literal', 'foo:bar', [False, False, True])
350 350
351 351 case insensitive regex matches
352 352 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
353 353 ('re', 'A.+b', [False, False, True])
354 354
355 355 case insensitive literal matches
356 356 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
357 357 ('literal', 'ABCDEFG', [False, False, True])
358 358 """
359 359 kind, pattern = _splitpattern(pattern)
360 360 if kind == b're':
361 361 try:
362 362 flags = 0
363 363 if not casesensitive:
364 364 flags = remod.I
365 365 regex = remod.compile(pattern, flags)
366 366 except remod.error as e:
367 367 raise error.ParseError(
368 368 _(b'invalid regular expression: %s') % forcebytestr(e)
369 369 )
370 370 return kind, pattern, regex.search
371 371 elif kind == b'literal':
372 372 if casesensitive:
373 373 match = pattern.__eq__
374 374 else:
375 375 ipat = encoding.lower(pattern)
376 376 match = lambda s: ipat == encoding.lower(s)
377 377 return kind, pattern, match
378 378
379 379 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
380 380
381 381
382 382 def substringregexp(pattern, flags=0):
383 383 """Build a regexp object from a string pattern possibly starting with
384 384 're:' or 'literal:' prefix.
385 385
386 386 helper for tests:
387 387 >>> def test(pattern, *tests):
388 388 ... regexp = substringregexp(pattern)
389 389 ... return [bool(regexp.search(t)) for t in tests]
390 390 >>> def itest(pattern, *tests):
391 391 ... regexp = substringregexp(pattern, remod.I)
392 392 ... return [bool(regexp.search(t)) for t in tests]
393 393
394 394 substring matching (no prefix):
395 395 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
396 396 [False, False, True]
397 397
398 398 substring pattern should be escaped:
399 399 >>> substringregexp(b'.bc').pattern
400 400 '\\\\.bc'
401 401 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
402 402 [False, False, False]
403 403
404 404 regex matching ('re:' prefix)
405 405 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
406 406 [False, False, True]
407 407
408 408 force substring matches ('literal:' prefix)
409 409 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
410 410 [False, True]
411 411
412 412 case insensitive literal matches
413 413 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
414 414 [False, False, True]
415 415
416 416 case insensitive regex matches
417 417 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
418 418 [False, False, True]
419 419 """
420 420 kind, pattern = _splitpattern(pattern)
421 421 if kind == b're':
422 422 try:
423 423 return remod.compile(pattern, flags)
424 424 except remod.error as e:
425 425 raise error.ParseError(
426 426 _(b'invalid regular expression: %s') % forcebytestr(e)
427 427 )
428 428 elif kind == b'literal':
429 429 return remod.compile(remod.escape(pattern), flags)
430 430
431 431 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
432 432
433 433
434 434 def shortuser(user):
435 435 """Return a short representation of a user name or email address."""
436 436 f = user.find(b'@')
437 437 if f >= 0:
438 438 user = user[:f]
439 439 f = user.find(b'<')
440 440 if f >= 0:
441 441 user = user[f + 1 :]
442 442 f = user.find(b' ')
443 443 if f >= 0:
444 444 user = user[:f]
445 445 f = user.find(b'.')
446 446 if f >= 0:
447 447 user = user[:f]
448 448 return user
449 449
450 450
451 451 def emailuser(user):
452 452 """Return the user portion of an email address."""
453 453 f = user.find(b'@')
454 454 if f >= 0:
455 455 user = user[:f]
456 456 f = user.find(b'<')
457 457 if f >= 0:
458 458 user = user[f + 1 :]
459 459 return user
460 460
461 461
462 462 def email(author):
463 463 '''get email of author.'''
464 464 r = author.find(b'>')
465 465 if r == -1:
466 466 r = None
467 467 return author[author.find(b'<') + 1 : r]
468 468
469 469
470 470 def person(author):
471 471 """Returns the name before an email address,
472 472 interpreting it as per RFC 5322
473 473
474 474 >>> person(b'foo@bar')
475 475 'foo'
476 476 >>> person(b'Foo Bar <foo@bar>')
477 477 'Foo Bar'
478 478 >>> person(b'"Foo Bar" <foo@bar>')
479 479 'Foo Bar'
480 480 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
481 481 'Foo "buz" Bar'
482 482 >>> # The following are invalid, but do exist in real-life
483 483 ...
484 484 >>> person(b'Foo "buz" Bar <foo@bar>')
485 485 'Foo "buz" Bar'
486 486 >>> person(b'"Foo Bar <foo@bar>')
487 487 'Foo Bar'
488 488 """
489 489 if b'@' not in author:
490 490 return author
491 491 f = author.find(b'<')
492 492 if f != -1:
493 493 return author[:f].strip(b' "').replace(b'\\"', b'"')
494 494 f = author.find(b'@')
495 495 return author[:f].replace(b'.', b' ')
496 496
497 497
498 498 @attr.s(hash=True)
499 499 class mailmapping(object):
500 500 """Represents a username/email key or value in
501 501 a mailmap file"""
502 502
503 503 email = attr.ib()
504 504 name = attr.ib(default=None)
505 505
506 506
507 507 def _ismailmaplineinvalid(names, emails):
508 508 """Returns True if the parsed names and emails
509 509 in a mailmap entry are invalid.
510 510
511 511 >>> # No names or emails fails
512 512 >>> names, emails = [], []
513 513 >>> _ismailmaplineinvalid(names, emails)
514 514 True
515 515 >>> # Only one email fails
516 516 >>> emails = [b'email@email.com']
517 517 >>> _ismailmaplineinvalid(names, emails)
518 518 True
519 519 >>> # One email and one name passes
520 520 >>> names = [b'Test Name']
521 521 >>> _ismailmaplineinvalid(names, emails)
522 522 False
523 523 >>> # No names but two emails passes
524 524 >>> names = []
525 525 >>> emails = [b'proper@email.com', b'commit@email.com']
526 526 >>> _ismailmaplineinvalid(names, emails)
527 527 False
528 528 """
529 529 return not emails or not names and len(emails) < 2
530 530
531 531
532 532 def parsemailmap(mailmapcontent):
533 533 """Parses data in the .mailmap format
534 534
535 535 >>> mmdata = b"\\n".join([
536 536 ... b'# Comment',
537 537 ... b'Name <commit1@email.xx>',
538 538 ... b'<name@email.xx> <commit2@email.xx>',
539 539 ... b'Name <proper@email.xx> <commit3@email.xx>',
540 540 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
541 541 ... ])
542 542 >>> mm = parsemailmap(mmdata)
543 543 >>> for key in sorted(mm.keys()):
544 544 ... print(key)
545 545 mailmapping(email='commit1@email.xx', name=None)
546 546 mailmapping(email='commit2@email.xx', name=None)
547 547 mailmapping(email='commit3@email.xx', name=None)
548 548 mailmapping(email='commit4@email.xx', name='Commit')
549 549 >>> for val in sorted(mm.values()):
550 550 ... print(val)
551 551 mailmapping(email='commit1@email.xx', name='Name')
552 552 mailmapping(email='name@email.xx', name=None)
553 553 mailmapping(email='proper@email.xx', name='Name')
554 554 mailmapping(email='proper@email.xx', name='Name')
555 555 """
556 556 mailmap = {}
557 557
558 558 if mailmapcontent is None:
559 559 return mailmap
560 560
561 561 for line in mailmapcontent.splitlines():
562 562
563 563 # Don't bother checking the line if it is a comment or
564 564 # is an improperly formed author field
565 565 if line.lstrip().startswith(b'#'):
566 566 continue
567 567
568 568 # names, emails hold the parsed emails and names for each line
569 569 # name_builder holds the words in a persons name
570 570 names, emails = [], []
571 571 namebuilder = []
572 572
573 573 for element in line.split():
574 574 if element.startswith(b'#'):
575 575 # If we reach a comment in the mailmap file, move on
576 576 break
577 577
578 578 elif element.startswith(b'<') and element.endswith(b'>'):
579 579 # We have found an email.
580 580 # Parse it, and finalize any names from earlier
581 581 emails.append(element[1:-1]) # Slice off the "<>"
582 582
583 583 if namebuilder:
584 584 names.append(b' '.join(namebuilder))
585 585 namebuilder = []
586 586
587 587 # Break if we have found a second email, any other
588 588 # data does not fit the spec for .mailmap
589 589 if len(emails) > 1:
590 590 break
591 591
592 592 else:
593 593 # We have found another word in the committers name
594 594 namebuilder.append(element)
595 595
596 596 # Check to see if we have parsed the line into a valid form
597 597 # We require at least one email, and either at least one
598 598 # name or a second email
599 599 if _ismailmaplineinvalid(names, emails):
600 600 continue
601 601
602 602 mailmapkey = mailmapping(
603 603 email=emails[-1],
604 604 name=names[-1] if len(names) == 2 else None,
605 605 )
606 606
607 607 mailmap[mailmapkey] = mailmapping(
608 608 email=emails[0],
609 609 name=names[0] if names else None,
610 610 )
611 611
612 612 return mailmap
613 613
614 614
615 615 def mapname(mailmap, author):
616 616 """Returns the author field according to the mailmap cache, or
617 617 the original author field.
618 618
619 619 >>> mmdata = b"\\n".join([
620 620 ... b'# Comment',
621 621 ... b'Name <commit1@email.xx>',
622 622 ... b'<name@email.xx> <commit2@email.xx>',
623 623 ... b'Name <proper@email.xx> <commit3@email.xx>',
624 624 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
625 625 ... ])
626 626 >>> m = parsemailmap(mmdata)
627 627 >>> mapname(m, b'Commit <commit1@email.xx>')
628 628 'Name <commit1@email.xx>'
629 629 >>> mapname(m, b'Name <commit2@email.xx>')
630 630 'Name <name@email.xx>'
631 631 >>> mapname(m, b'Commit <commit3@email.xx>')
632 632 'Name <proper@email.xx>'
633 633 >>> mapname(m, b'Commit <commit4@email.xx>')
634 634 'Name <proper@email.xx>'
635 635 >>> mapname(m, b'Unknown Name <unknown@email.com>')
636 636 'Unknown Name <unknown@email.com>'
637 637 """
638 638 # If the author field coming in isn't in the correct format,
639 639 # or the mailmap is empty just return the original author field
640 640 if not isauthorwellformed(author) or not mailmap:
641 641 return author
642 642
643 643 # Turn the user name into a mailmapping
644 644 commit = mailmapping(name=person(author), email=email(author))
645 645
646 646 try:
647 647 # Try and use both the commit email and name as the key
648 648 proper = mailmap[commit]
649 649
650 650 except KeyError:
651 651 # If the lookup fails, use just the email as the key instead
652 652 # We call this commit2 as not to erase original commit fields
653 653 commit2 = mailmapping(email=commit.email)
654 654 proper = mailmap.get(commit2, mailmapping(None, None))
655 655
656 656 # Return the author field with proper values filled in
657 657 return b'%s <%s>' % (
658 658 proper.name if proper.name else commit.name,
659 659 proper.email if proper.email else commit.email,
660 660 )
661 661
662 662
663 663 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
664 664
665 665
666 666 def isauthorwellformed(author):
667 667 """Return True if the author field is well formed
668 668 (ie "Contributor Name <contrib@email.dom>")
669 669
670 670 >>> isauthorwellformed(b'Good Author <good@author.com>')
671 671 True
672 672 >>> isauthorwellformed(b'Author <good@author.com>')
673 673 True
674 674 >>> isauthorwellformed(b'Bad Author')
675 675 False
676 676 >>> isauthorwellformed(b'Bad Author <author@author.com')
677 677 False
678 678 >>> isauthorwellformed(b'Bad Author author@author.com')
679 679 False
680 680 >>> isauthorwellformed(b'<author@author.com>')
681 681 False
682 682 >>> isauthorwellformed(b'Bad Author <author>')
683 683 False
684 684 """
685 685 return _correctauthorformat.match(author) is not None
686 686
687 687
688 688 def ellipsis(text, maxlength=400):
689 689 """Trim string to at most maxlength (default: 400) columns in display."""
690 690 return encoding.trim(text, maxlength, ellipsis=b'...')
691 691
692 692
693 693 def escapestr(s):
694 694 if isinstance(s, memoryview):
695 695 s = bytes(s)
696 696 # call underlying function of s.encode('string_escape') directly for
697 697 # Python 3 compatibility
698 698 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
699 699
700 700
701 701 def unescapestr(s):
702 702 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
703 703
704 704
705 705 def forcebytestr(obj):
706 706 """Portably format an arbitrary object (e.g. exception) into a byte
707 707 string."""
708 708 try:
709 709 return pycompat.bytestr(obj)
710 710 except UnicodeEncodeError:
711 711 # non-ascii string, may be lossy
712 712 return pycompat.bytestr(encoding.strtolocal(str(obj)))
713 713
714 714
715 715 def uirepr(s):
716 716 # Avoid double backslash in Windows path repr()
717 717 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
718 718
719 719
720 720 # delay import of textwrap
721 721 def _MBTextWrapper(**kwargs):
722 722 class tw(textwrap.TextWrapper):
723 723 """
724 724 Extend TextWrapper for width-awareness.
725 725
726 726 Neither number of 'bytes' in any encoding nor 'characters' is
727 727 appropriate to calculate terminal columns for specified string.
728 728
729 729 Original TextWrapper implementation uses built-in 'len()' directly,
730 730 so overriding is needed to use width information of each characters.
731 731
732 732 In addition, characters classified into 'ambiguous' width are
733 733 treated as wide in East Asian area, but as narrow in other.
734 734
735 735 This requires use decision to determine width of such characters.
736 736 """
737 737
738 738 def _cutdown(self, ucstr, space_left):
739 739 l = 0
740 740 colwidth = encoding.ucolwidth
741 741 for i in pycompat.xrange(len(ucstr)):
742 742 l += colwidth(ucstr[i])
743 743 if space_left < l:
744 744 return (ucstr[:i], ucstr[i:])
745 745 return ucstr, b''
746 746
747 747 # overriding of base class
748 748 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
749 749 space_left = max(width - cur_len, 1)
750 750
751 751 if self.break_long_words:
752 752 cut, res = self._cutdown(reversed_chunks[-1], space_left)
753 753 cur_line.append(cut)
754 754 reversed_chunks[-1] = res
755 755 elif not cur_line:
756 756 cur_line.append(reversed_chunks.pop())
757 757
758 758 # this overriding code is imported from TextWrapper of Python 2.6
759 759 # to calculate columns of string by 'encoding.ucolwidth()'
760 760 def _wrap_chunks(self, chunks):
761 761 colwidth = encoding.ucolwidth
762 762
763 763 lines = []
764 764 if self.width <= 0:
765 765 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
766 766
767 767 # Arrange in reverse order so items can be efficiently popped
768 768 # from a stack of chucks.
769 769 chunks.reverse()
770 770
771 771 while chunks:
772 772
773 773 # Start the list of chunks that will make up the current line.
774 774 # cur_len is just the length of all the chunks in cur_line.
775 775 cur_line = []
776 776 cur_len = 0
777 777
778 778 # Figure out which static string will prefix this line.
779 779 if lines:
780 780 indent = self.subsequent_indent
781 781 else:
782 782 indent = self.initial_indent
783 783
784 784 # Maximum width for this line.
785 785 width = self.width - len(indent)
786 786
787 787 # First chunk on line is whitespace -- drop it, unless this
788 788 # is the very beginning of the text (i.e. no lines started yet).
789 789 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
790 790 del chunks[-1]
791 791
792 792 while chunks:
793 793 l = colwidth(chunks[-1])
794 794
795 795 # Can at least squeeze this chunk onto the current line.
796 796 if cur_len + l <= width:
797 797 cur_line.append(chunks.pop())
798 798 cur_len += l
799 799
800 800 # Nope, this line is full.
801 801 else:
802 802 break
803 803
804 804 # The current line is full, and the next chunk is too big to
805 805 # fit on *any* line (not just this one).
806 806 if chunks and colwidth(chunks[-1]) > width:
807 807 self._handle_long_word(chunks, cur_line, cur_len, width)
808 808
809 809 # If the last chunk on this line is all whitespace, drop it.
810 810 if (
811 811 self.drop_whitespace
812 812 and cur_line
813 813 and cur_line[-1].strip() == r''
814 814 ):
815 815 del cur_line[-1]
816 816
817 817 # Convert current line back to a string and store it in list
818 818 # of all lines (return value).
819 819 if cur_line:
820 820 lines.append(indent + ''.join(cur_line))
821 821
822 822 return lines
823 823
824 824 global _MBTextWrapper
825 825 _MBTextWrapper = tw
826 826 return tw(**kwargs)
827 827
828 828
829 829 def wrap(line, width, initindent=b'', hangindent=b''):
830 830 maxindent = max(len(hangindent), len(initindent))
831 831 if width <= maxindent:
832 832 # adjust for weird terminal size
833 833 width = max(78, maxindent + 1)
834 834 line = line.decode(
835 835 pycompat.sysstr(encoding.encoding),
836 836 pycompat.sysstr(encoding.encodingmode),
837 837 )
838 838 initindent = initindent.decode(
839 839 pycompat.sysstr(encoding.encoding),
840 840 pycompat.sysstr(encoding.encodingmode),
841 841 )
842 842 hangindent = hangindent.decode(
843 843 pycompat.sysstr(encoding.encoding),
844 844 pycompat.sysstr(encoding.encodingmode),
845 845 )
846 846 wrapper = _MBTextWrapper(
847 847 width=width, initial_indent=initindent, subsequent_indent=hangindent
848 848 )
849 849 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
850 850
851 851
852 852 _booleans = {
853 853 b'1': True,
854 854 b'yes': True,
855 855 b'true': True,
856 856 b'on': True,
857 857 b'always': True,
858 858 b'0': False,
859 859 b'no': False,
860 860 b'false': False,
861 861 b'off': False,
862 862 b'never': False,
863 863 }
864 864
865 865
866 866 def parsebool(s):
867 867 """Parse s into a boolean.
868 868
869 869 If s is not a valid boolean, returns None.
870 870 """
871 871 return _booleans.get(s.lower(), None)
872 872
873 873
874 874 def parselist(value):
875 875 """parse a configuration value as a list of comma/space separated strings
876 876
877 877 >>> parselist(b'this,is "a small" ,test')
878 878 ['this', 'is', 'a small', 'test']
879 879 """
880 880
881 881 def _parse_plain(parts, s, offset):
882 882 whitespace = False
883 883 while offset < len(s) and (
884 884 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
885 885 ):
886 886 whitespace = True
887 887 offset += 1
888 888 if offset >= len(s):
889 889 return None, parts, offset
890 890 if whitespace:
891 891 parts.append(b'')
892 892 if s[offset : offset + 1] == b'"' and not parts[-1]:
893 893 return _parse_quote, parts, offset + 1
894 894 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
895 895 parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
896 896 return _parse_plain, parts, offset + 1
897 897 parts[-1] += s[offset : offset + 1]
898 898 return _parse_plain, parts, offset + 1
899 899
900 900 def _parse_quote(parts, s, offset):
901 901 if offset < len(s) and s[offset : offset + 1] == b'"': # ""
902 902 parts.append(b'')
903 903 offset += 1
904 904 while offset < len(s) and (
905 905 s[offset : offset + 1].isspace()
906 906 or s[offset : offset + 1] == b','
907 907 ):
908 908 offset += 1
909 909 return _parse_plain, parts, offset
910 910
911 911 while offset < len(s) and s[offset : offset + 1] != b'"':
912 912 if (
913 913 s[offset : offset + 1] == b'\\'
914 914 and offset + 1 < len(s)
915 915 and s[offset + 1 : offset + 2] == b'"'
916 916 ):
917 917 offset += 1
918 918 parts[-1] += b'"'
919 919 else:
920 920 parts[-1] += s[offset : offset + 1]
921 921 offset += 1
922 922
923 923 if offset >= len(s):
924 924 real_parts = _configlist(parts[-1])
925 925 if not real_parts:
926 926 parts[-1] = b'"'
927 927 else:
928 928 real_parts[0] = b'"' + real_parts[0]
929 929 parts = parts[:-1]
930 930 parts.extend(real_parts)
931 931 return None, parts, offset
932 932
933 933 offset += 1
934 934 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
935 935 offset += 1
936 936
937 937 if offset < len(s):
938 938 if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
939 939 parts[-1] += b'"'
940 940 offset += 1
941 941 else:
942 942 parts.append(b'')
943 943 else:
944 944 return None, parts, offset
945 945
946 946 return _parse_plain, parts, offset
947 947
948 948 def _configlist(s):
949 949 s = s.rstrip(b' ,')
950 950 if not s:
951 951 return []
952 952 parser, parts, offset = _parse_plain, [b''], 0
953 953 while parser:
954 954 parser, parts, offset = parser(parts, s, offset)
955 955 return parts
956 956
957 957 if value is not None and isinstance(value, bytes):
958 958 result = _configlist(value.lstrip(b' ,\n'))
959 959 else:
960 960 result = value
961 961 return result or []
962 962
963 963
964 964 def evalpythonliteral(s):
965 965 """Evaluate a string containing a Python literal expression"""
966 966 # We could backport our tokenizer hack to rewrite '' to u'' if we want
967 if pycompat.ispy3:
968 return ast.literal_eval(s.decode('latin1'))
969 return ast.literal_eval(s)
967 return ast.literal_eval(s.decode('latin1'))
General Comments 0
You need to be logged in to leave comments. Login now