##// END OF EJS Templates
stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara -
r46314:d502caab default
parent child Browse files
Show More
@@ -1,812 +1,820
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 16 import types
17 17
18 18 from ..i18n import _
19 19 from ..thirdparty import attr
20 20
21 21 from .. import (
22 22 encoding,
23 23 error,
24 24 pycompat,
25 25 )
26 26
27 27 # regex special chars pulled from https://bugs.python.org/issue29995
28 28 # which was part of Python 3.7.
29 29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
30 30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
31 31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
32 32
33 33
34 34 def reescape(pat):
35 35 """Drop-in replacement for re.escape."""
36 36 # NOTE: it is intentional that this works on unicodes and not
37 37 # bytes, as it's only possible to do the escaping with
38 38 # unicode.translate, not bytes.translate. Sigh.
39 39 wantuni = True
40 40 if isinstance(pat, bytes):
41 41 wantuni = False
42 42 pat = pat.decode('latin1')
43 43 pat = pat.translate(_regexescapemap)
44 44 if wantuni:
45 45 return pat
46 46 return pat.encode('latin1')
47 47
48 48
49 49 def pprint(o, bprefix=False, indent=0, level=0):
50 50 """Pretty print an object."""
51 51 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
52 52
53 53
54 54 def pprintgen(o, bprefix=False, indent=0, level=0):
55 55 """Pretty print an object to a generator of atoms.
56 56
57 57 ``bprefix`` is a flag influencing whether bytestrings are preferred with
58 58 a ``b''`` prefix.
59 59
60 60 ``indent`` controls whether collections and nested data structures
61 61 span multiple lines via the indentation amount in spaces. By default,
62 62 no newlines are emitted.
63 63
64 64 ``level`` specifies the initial indent level. Used if ``indent > 0``.
65 65 """
66 66
67 67 if isinstance(o, bytes):
68 68 if bprefix:
69 69 yield b"b'%s'" % escapestr(o)
70 70 else:
71 71 yield b"'%s'" % escapestr(o)
72 72 elif isinstance(o, bytearray):
73 73 # codecs.escape_encode() can't handle bytearray, so escapestr fails
74 74 # without coercion.
75 75 yield b"bytearray['%s']" % escapestr(bytes(o))
76 76 elif isinstance(o, list):
77 77 if not o:
78 78 yield b'[]'
79 79 return
80 80
81 81 yield b'['
82 82
83 83 if indent:
84 84 level += 1
85 85 yield b'\n'
86 86 yield b' ' * (level * indent)
87 87
88 88 for i, a in enumerate(o):
89 89 for chunk in pprintgen(
90 90 a, bprefix=bprefix, indent=indent, level=level
91 91 ):
92 92 yield chunk
93 93
94 94 if i + 1 < len(o):
95 95 if indent:
96 96 yield b',\n'
97 97 yield b' ' * (level * indent)
98 98 else:
99 99 yield b', '
100 100
101 101 if indent:
102 102 level -= 1
103 103 yield b'\n'
104 104 yield b' ' * (level * indent)
105 105
106 106 yield b']'
107 107 elif isinstance(o, dict):
108 108 if not o:
109 109 yield b'{}'
110 110 return
111 111
112 112 yield b'{'
113 113
114 114 if indent:
115 115 level += 1
116 116 yield b'\n'
117 117 yield b' ' * (level * indent)
118 118
119 119 for i, (k, v) in enumerate(sorted(o.items())):
120 120 for chunk in pprintgen(
121 121 k, bprefix=bprefix, indent=indent, level=level
122 122 ):
123 123 yield chunk
124 124
125 125 yield b': '
126 126
127 127 for chunk in pprintgen(
128 128 v, bprefix=bprefix, indent=indent, level=level
129 129 ):
130 130 yield chunk
131 131
132 132 if i + 1 < len(o):
133 133 if indent:
134 134 yield b',\n'
135 135 yield b' ' * (level * indent)
136 136 else:
137 137 yield b', '
138 138
139 139 if indent:
140 140 level -= 1
141 141 yield b'\n'
142 142 yield b' ' * (level * indent)
143 143
144 144 yield b'}'
145 145 elif isinstance(o, set):
146 146 if not o:
147 147 yield b'set([])'
148 148 return
149 149
150 150 yield b'set(['
151 151
152 152 if indent:
153 153 level += 1
154 154 yield b'\n'
155 155 yield b' ' * (level * indent)
156 156
157 157 for i, k in enumerate(sorted(o)):
158 158 for chunk in pprintgen(
159 159 k, bprefix=bprefix, indent=indent, level=level
160 160 ):
161 161 yield chunk
162 162
163 163 if i + 1 < len(o):
164 164 if indent:
165 165 yield b',\n'
166 166 yield b' ' * (level * indent)
167 167 else:
168 168 yield b', '
169 169
170 170 if indent:
171 171 level -= 1
172 172 yield b'\n'
173 173 yield b' ' * (level * indent)
174 174
175 175 yield b'])'
176 176 elif isinstance(o, tuple):
177 177 if not o:
178 178 yield b'()'
179 179 return
180 180
181 181 yield b'('
182 182
183 183 if indent:
184 184 level += 1
185 185 yield b'\n'
186 186 yield b' ' * (level * indent)
187 187
188 188 for i, a in enumerate(o):
189 189 for chunk in pprintgen(
190 190 a, bprefix=bprefix, indent=indent, level=level
191 191 ):
192 192 yield chunk
193 193
194 194 if i + 1 < len(o):
195 195 if indent:
196 196 yield b',\n'
197 197 yield b' ' * (level * indent)
198 198 else:
199 199 yield b', '
200 200
201 201 if indent:
202 202 level -= 1
203 203 yield b'\n'
204 204 yield b' ' * (level * indent)
205 205
206 206 yield b')'
207 207 elif isinstance(o, types.GeneratorType):
208 208 # Special case of empty generator.
209 209 try:
210 210 nextitem = next(o)
211 211 except StopIteration:
212 212 yield b'gen[]'
213 213 return
214 214
215 215 yield b'gen['
216 216
217 217 if indent:
218 218 level += 1
219 219 yield b'\n'
220 220 yield b' ' * (level * indent)
221 221
222 222 last = False
223 223
224 224 while not last:
225 225 current = nextitem
226 226
227 227 try:
228 228 nextitem = next(o)
229 229 except StopIteration:
230 230 last = True
231 231
232 232 for chunk in pprintgen(
233 233 current, bprefix=bprefix, indent=indent, level=level
234 234 ):
235 235 yield chunk
236 236
237 237 if not last:
238 238 if indent:
239 239 yield b',\n'
240 240 yield b' ' * (level * indent)
241 241 else:
242 242 yield b', '
243 243
244 244 if indent:
245 245 level -= 1
246 246 yield b'\n'
247 247 yield b' ' * (level * indent)
248 248
249 249 yield b']'
250 250 else:
251 251 yield pycompat.byterepr(o)
252 252
253 253
254 254 def prettyrepr(o):
255 255 """Pretty print a representation of a possibly-nested object"""
256 256 lines = []
257 257 rs = pycompat.byterepr(o)
258 258 p0 = p1 = 0
259 259 while p0 < len(rs):
260 260 # '... field=<type ... field=<type ...'
261 261 # ~~~~~~~~~~~~~~~~
262 262 # p0 p1 q0 q1
263 263 q0 = -1
264 264 q1 = rs.find(b'<', p1 + 1)
265 265 if q1 < 0:
266 266 q1 = len(rs)
267 267 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
268 268 # backtrack for ' field=<'
269 269 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
270 270 if q0 < 0:
271 271 q0 = q1
272 272 else:
273 273 q0 += 1 # skip ' '
274 274 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
275 275 assert l >= 0
276 276 lines.append((l, rs[p0:q0].rstrip()))
277 277 p0, p1 = q0, q1
278 278 return b'\n'.join(b' ' * l + s for l, s in lines)
279 279
280 280
281 281 def buildrepr(r):
282 282 """Format an optional printable representation from unexpanded bits
283 283
284 284 ======== =================================
285 285 type(r) example
286 286 ======== =================================
287 287 tuple ('<not %r>', other)
288 288 bytes '<branch closed>'
289 289 callable lambda: '<branch %r>' % sorted(b)
290 290 object other
291 291 ======== =================================
292 292 """
293 293 if r is None:
294 294 return b''
295 295 elif isinstance(r, tuple):
296 296 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
297 297 elif isinstance(r, bytes):
298 298 return r
299 299 elif callable(r):
300 300 return r()
301 301 else:
302 302 return pprint(r)
303 303
304 304
305 305 def binary(s):
306 306 """return true if a string is binary data"""
307 307 return bool(s and b'\0' in s)
308 308
309 309
310 def _splitpattern(pattern):
311 if pattern.startswith(b're:'):
312 return b're', pattern[3:]
313 elif pattern.startswith(b'literal:'):
314 return b'literal', pattern[8:]
315 return b'literal', pattern
316
317
310 318 def stringmatcher(pattern, casesensitive=True):
311 319 """
312 320 accepts a string, possibly starting with 're:' or 'literal:' prefix.
313 321 returns the matcher name, pattern, and matcher function.
314 322 missing or unknown prefixes are treated as literal matches.
315 323
316 324 helper for tests:
317 325 >>> def test(pattern, *tests):
318 326 ... kind, pattern, matcher = stringmatcher(pattern)
319 327 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
320 328 >>> def itest(pattern, *tests):
321 329 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
322 330 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
323 331
324 332 exact matching (no prefix):
325 333 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
326 334 ('literal', 'abcdefg', [False, False, True])
327 335
328 336 regex matching ('re:' prefix)
329 337 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
330 338 ('re', 'a.+b', [False, False, True])
331 339
332 340 force exact matches ('literal:' prefix)
333 341 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
334 342 ('literal', 're:foobar', [False, True])
335 343
336 344 unknown prefixes are ignored and treated as literals
337 345 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
338 346 ('literal', 'foo:bar', [False, False, True])
339 347
340 348 case insensitive regex matches
341 349 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
342 350 ('re', 'A.+b', [False, False, True])
343 351
344 352 case insensitive literal matches
345 353 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
346 354 ('literal', 'ABCDEFG', [False, False, True])
347 355 """
348 if pattern.startswith(b're:'):
349 pattern = pattern[3:]
356 kind, pattern = _splitpattern(pattern)
357 if kind == b're':
350 358 try:
351 359 flags = 0
352 360 if not casesensitive:
353 361 flags = remod.I
354 362 regex = remod.compile(pattern, flags)
355 363 except remod.error as e:
356 364 raise error.ParseError(_(b'invalid regular expression: %s') % e)
357 return b're', pattern, regex.search
358 elif pattern.startswith(b'literal:'):
359 pattern = pattern[8:]
365 return kind, pattern, regex.search
366 elif kind == b'literal':
367 if casesensitive:
368 match = pattern.__eq__
369 else:
370 ipat = encoding.lower(pattern)
371 match = lambda s: ipat == encoding.lower(s)
372 return kind, pattern, match
360 373
361 match = pattern.__eq__
362
363 if not casesensitive:
364 ipat = encoding.lower(pattern)
365 match = lambda s: ipat == encoding.lower(s)
366 return b'literal', pattern, match
374 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
367 375
368 376
369 377 def shortuser(user):
370 378 """Return a short representation of a user name or email address."""
371 379 f = user.find(b'@')
372 380 if f >= 0:
373 381 user = user[:f]
374 382 f = user.find(b'<')
375 383 if f >= 0:
376 384 user = user[f + 1 :]
377 385 f = user.find(b' ')
378 386 if f >= 0:
379 387 user = user[:f]
380 388 f = user.find(b'.')
381 389 if f >= 0:
382 390 user = user[:f]
383 391 return user
384 392
385 393
386 394 def emailuser(user):
387 395 """Return the user portion of an email address."""
388 396 f = user.find(b'@')
389 397 if f >= 0:
390 398 user = user[:f]
391 399 f = user.find(b'<')
392 400 if f >= 0:
393 401 user = user[f + 1 :]
394 402 return user
395 403
396 404
397 405 def email(author):
398 406 '''get email of author.'''
399 407 r = author.find(b'>')
400 408 if r == -1:
401 409 r = None
402 410 return author[author.find(b'<') + 1 : r]
403 411
404 412
405 413 def person(author):
406 414 """Returns the name before an email address,
407 415 interpreting it as per RFC 5322
408 416
409 417 >>> person(b'foo@bar')
410 418 'foo'
411 419 >>> person(b'Foo Bar <foo@bar>')
412 420 'Foo Bar'
413 421 >>> person(b'"Foo Bar" <foo@bar>')
414 422 'Foo Bar'
415 423 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
416 424 'Foo "buz" Bar'
417 425 >>> # The following are invalid, but do exist in real-life
418 426 ...
419 427 >>> person(b'Foo "buz" Bar <foo@bar>')
420 428 'Foo "buz" Bar'
421 429 >>> person(b'"Foo Bar <foo@bar>')
422 430 'Foo Bar'
423 431 """
424 432 if b'@' not in author:
425 433 return author
426 434 f = author.find(b'<')
427 435 if f != -1:
428 436 return author[:f].strip(b' "').replace(b'\\"', b'"')
429 437 f = author.find(b'@')
430 438 return author[:f].replace(b'.', b' ')
431 439
432 440
433 441 @attr.s(hash=True)
434 442 class mailmapping(object):
435 443 '''Represents a username/email key or value in
436 444 a mailmap file'''
437 445
438 446 email = attr.ib()
439 447 name = attr.ib(default=None)
440 448
441 449
442 450 def _ismailmaplineinvalid(names, emails):
443 451 '''Returns True if the parsed names and emails
444 452 in a mailmap entry are invalid.
445 453
446 454 >>> # No names or emails fails
447 455 >>> names, emails = [], []
448 456 >>> _ismailmaplineinvalid(names, emails)
449 457 True
450 458 >>> # Only one email fails
451 459 >>> emails = [b'email@email.com']
452 460 >>> _ismailmaplineinvalid(names, emails)
453 461 True
454 462 >>> # One email and one name passes
455 463 >>> names = [b'Test Name']
456 464 >>> _ismailmaplineinvalid(names, emails)
457 465 False
458 466 >>> # No names but two emails passes
459 467 >>> names = []
460 468 >>> emails = [b'proper@email.com', b'commit@email.com']
461 469 >>> _ismailmaplineinvalid(names, emails)
462 470 False
463 471 '''
464 472 return not emails or not names and len(emails) < 2
465 473
466 474
467 475 def parsemailmap(mailmapcontent):
468 476 """Parses data in the .mailmap format
469 477
470 478 >>> mmdata = b"\\n".join([
471 479 ... b'# Comment',
472 480 ... b'Name <commit1@email.xx>',
473 481 ... b'<name@email.xx> <commit2@email.xx>',
474 482 ... b'Name <proper@email.xx> <commit3@email.xx>',
475 483 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
476 484 ... ])
477 485 >>> mm = parsemailmap(mmdata)
478 486 >>> for key in sorted(mm.keys()):
479 487 ... print(key)
480 488 mailmapping(email='commit1@email.xx', name=None)
481 489 mailmapping(email='commit2@email.xx', name=None)
482 490 mailmapping(email='commit3@email.xx', name=None)
483 491 mailmapping(email='commit4@email.xx', name='Commit')
484 492 >>> for val in sorted(mm.values()):
485 493 ... print(val)
486 494 mailmapping(email='commit1@email.xx', name='Name')
487 495 mailmapping(email='name@email.xx', name=None)
488 496 mailmapping(email='proper@email.xx', name='Name')
489 497 mailmapping(email='proper@email.xx', name='Name')
490 498 """
491 499 mailmap = {}
492 500
493 501 if mailmapcontent is None:
494 502 return mailmap
495 503
496 504 for line in mailmapcontent.splitlines():
497 505
498 506 # Don't bother checking the line if it is a comment or
499 507 # is an improperly formed author field
500 508 if line.lstrip().startswith(b'#'):
501 509 continue
502 510
503 511 # names, emails hold the parsed emails and names for each line
504 512 # name_builder holds the words in a persons name
505 513 names, emails = [], []
506 514 namebuilder = []
507 515
508 516 for element in line.split():
509 517 if element.startswith(b'#'):
510 518 # If we reach a comment in the mailmap file, move on
511 519 break
512 520
513 521 elif element.startswith(b'<') and element.endswith(b'>'):
514 522 # We have found an email.
515 523 # Parse it, and finalize any names from earlier
516 524 emails.append(element[1:-1]) # Slice off the "<>"
517 525
518 526 if namebuilder:
519 527 names.append(b' '.join(namebuilder))
520 528 namebuilder = []
521 529
522 530 # Break if we have found a second email, any other
523 531 # data does not fit the spec for .mailmap
524 532 if len(emails) > 1:
525 533 break
526 534
527 535 else:
528 536 # We have found another word in the committers name
529 537 namebuilder.append(element)
530 538
531 539 # Check to see if we have parsed the line into a valid form
532 540 # We require at least one email, and either at least one
533 541 # name or a second email
534 542 if _ismailmaplineinvalid(names, emails):
535 543 continue
536 544
537 545 mailmapkey = mailmapping(
538 546 email=emails[-1], name=names[-1] if len(names) == 2 else None,
539 547 )
540 548
541 549 mailmap[mailmapkey] = mailmapping(
542 550 email=emails[0], name=names[0] if names else None,
543 551 )
544 552
545 553 return mailmap
546 554
547 555
548 556 def mapname(mailmap, author):
549 557 """Returns the author field according to the mailmap cache, or
550 558 the original author field.
551 559
552 560 >>> mmdata = b"\\n".join([
553 561 ... b'# Comment',
554 562 ... b'Name <commit1@email.xx>',
555 563 ... b'<name@email.xx> <commit2@email.xx>',
556 564 ... b'Name <proper@email.xx> <commit3@email.xx>',
557 565 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
558 566 ... ])
559 567 >>> m = parsemailmap(mmdata)
560 568 >>> mapname(m, b'Commit <commit1@email.xx>')
561 569 'Name <commit1@email.xx>'
562 570 >>> mapname(m, b'Name <commit2@email.xx>')
563 571 'Name <name@email.xx>'
564 572 >>> mapname(m, b'Commit <commit3@email.xx>')
565 573 'Name <proper@email.xx>'
566 574 >>> mapname(m, b'Commit <commit4@email.xx>')
567 575 'Name <proper@email.xx>'
568 576 >>> mapname(m, b'Unknown Name <unknown@email.com>')
569 577 'Unknown Name <unknown@email.com>'
570 578 """
571 579 # If the author field coming in isn't in the correct format,
572 580 # or the mailmap is empty just return the original author field
573 581 if not isauthorwellformed(author) or not mailmap:
574 582 return author
575 583
576 584 # Turn the user name into a mailmapping
577 585 commit = mailmapping(name=person(author), email=email(author))
578 586
579 587 try:
580 588 # Try and use both the commit email and name as the key
581 589 proper = mailmap[commit]
582 590
583 591 except KeyError:
584 592 # If the lookup fails, use just the email as the key instead
585 593 # We call this commit2 as not to erase original commit fields
586 594 commit2 = mailmapping(email=commit.email)
587 595 proper = mailmap.get(commit2, mailmapping(None, None))
588 596
589 597 # Return the author field with proper values filled in
590 598 return b'%s <%s>' % (
591 599 proper.name if proper.name else commit.name,
592 600 proper.email if proper.email else commit.email,
593 601 )
594 602
595 603
596 604 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
597 605
598 606
599 607 def isauthorwellformed(author):
600 608 '''Return True if the author field is well formed
601 609 (ie "Contributor Name <contrib@email.dom>")
602 610
603 611 >>> isauthorwellformed(b'Good Author <good@author.com>')
604 612 True
605 613 >>> isauthorwellformed(b'Author <good@author.com>')
606 614 True
607 615 >>> isauthorwellformed(b'Bad Author')
608 616 False
609 617 >>> isauthorwellformed(b'Bad Author <author@author.com')
610 618 False
611 619 >>> isauthorwellformed(b'Bad Author author@author.com')
612 620 False
613 621 >>> isauthorwellformed(b'<author@author.com>')
614 622 False
615 623 >>> isauthorwellformed(b'Bad Author <author>')
616 624 False
617 625 '''
618 626 return _correctauthorformat.match(author) is not None
619 627
620 628
621 629 def ellipsis(text, maxlength=400):
622 630 """Trim string to at most maxlength (default: 400) columns in display."""
623 631 return encoding.trim(text, maxlength, ellipsis=b'...')
624 632
625 633
626 634 def escapestr(s):
627 635 if isinstance(s, memoryview):
628 636 s = bytes(s)
629 637 # call underlying function of s.encode('string_escape') directly for
630 638 # Python 3 compatibility
631 639 return codecs.escape_encode(s)[0]
632 640
633 641
634 642 def unescapestr(s):
635 643 return codecs.escape_decode(s)[0]
636 644
637 645
638 646 def forcebytestr(obj):
639 647 """Portably format an arbitrary object (e.g. exception) into a byte
640 648 string."""
641 649 try:
642 650 return pycompat.bytestr(obj)
643 651 except UnicodeEncodeError:
644 652 # non-ascii string, may be lossy
645 653 return pycompat.bytestr(encoding.strtolocal(str(obj)))
646 654
647 655
648 656 def uirepr(s):
649 657 # Avoid double backslash in Windows path repr()
650 658 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
651 659
652 660
653 661 # delay import of textwrap
654 662 def _MBTextWrapper(**kwargs):
655 663 class tw(textwrap.TextWrapper):
656 664 """
657 665 Extend TextWrapper for width-awareness.
658 666
659 667 Neither number of 'bytes' in any encoding nor 'characters' is
660 668 appropriate to calculate terminal columns for specified string.
661 669
662 670 Original TextWrapper implementation uses built-in 'len()' directly,
663 671 so overriding is needed to use width information of each characters.
664 672
665 673 In addition, characters classified into 'ambiguous' width are
666 674 treated as wide in East Asian area, but as narrow in other.
667 675
668 676 This requires use decision to determine width of such characters.
669 677 """
670 678
671 679 def _cutdown(self, ucstr, space_left):
672 680 l = 0
673 681 colwidth = encoding.ucolwidth
674 682 for i in pycompat.xrange(len(ucstr)):
675 683 l += colwidth(ucstr[i])
676 684 if space_left < l:
677 685 return (ucstr[:i], ucstr[i:])
678 686 return ucstr, b''
679 687
680 688 # overriding of base class
681 689 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
682 690 space_left = max(width - cur_len, 1)
683 691
684 692 if self.break_long_words:
685 693 cut, res = self._cutdown(reversed_chunks[-1], space_left)
686 694 cur_line.append(cut)
687 695 reversed_chunks[-1] = res
688 696 elif not cur_line:
689 697 cur_line.append(reversed_chunks.pop())
690 698
691 699 # this overriding code is imported from TextWrapper of Python 2.6
692 700 # to calculate columns of string by 'encoding.ucolwidth()'
693 701 def _wrap_chunks(self, chunks):
694 702 colwidth = encoding.ucolwidth
695 703
696 704 lines = []
697 705 if self.width <= 0:
698 706 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
699 707
700 708 # Arrange in reverse order so items can be efficiently popped
701 709 # from a stack of chucks.
702 710 chunks.reverse()
703 711
704 712 while chunks:
705 713
706 714 # Start the list of chunks that will make up the current line.
707 715 # cur_len is just the length of all the chunks in cur_line.
708 716 cur_line = []
709 717 cur_len = 0
710 718
711 719 # Figure out which static string will prefix this line.
712 720 if lines:
713 721 indent = self.subsequent_indent
714 722 else:
715 723 indent = self.initial_indent
716 724
717 725 # Maximum width for this line.
718 726 width = self.width - len(indent)
719 727
720 728 # First chunk on line is whitespace -- drop it, unless this
721 729 # is the very beginning of the text (i.e. no lines started yet).
722 730 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
723 731 del chunks[-1]
724 732
725 733 while chunks:
726 734 l = colwidth(chunks[-1])
727 735
728 736 # Can at least squeeze this chunk onto the current line.
729 737 if cur_len + l <= width:
730 738 cur_line.append(chunks.pop())
731 739 cur_len += l
732 740
733 741 # Nope, this line is full.
734 742 else:
735 743 break
736 744
737 745 # The current line is full, and the next chunk is too big to
738 746 # fit on *any* line (not just this one).
739 747 if chunks and colwidth(chunks[-1]) > width:
740 748 self._handle_long_word(chunks, cur_line, cur_len, width)
741 749
742 750 # If the last chunk on this line is all whitespace, drop it.
743 751 if (
744 752 self.drop_whitespace
745 753 and cur_line
746 754 and cur_line[-1].strip() == r''
747 755 ):
748 756 del cur_line[-1]
749 757
750 758 # Convert current line back to a string and store it in list
751 759 # of all lines (return value).
752 760 if cur_line:
753 761 lines.append(indent + ''.join(cur_line))
754 762
755 763 return lines
756 764
757 765 global _MBTextWrapper
758 766 _MBTextWrapper = tw
759 767 return tw(**kwargs)
760 768
761 769
762 770 def wrap(line, width, initindent=b'', hangindent=b''):
763 771 maxindent = max(len(hangindent), len(initindent))
764 772 if width <= maxindent:
765 773 # adjust for weird terminal size
766 774 width = max(78, maxindent + 1)
767 775 line = line.decode(
768 776 pycompat.sysstr(encoding.encoding),
769 777 pycompat.sysstr(encoding.encodingmode),
770 778 )
771 779 initindent = initindent.decode(
772 780 pycompat.sysstr(encoding.encoding),
773 781 pycompat.sysstr(encoding.encodingmode),
774 782 )
775 783 hangindent = hangindent.decode(
776 784 pycompat.sysstr(encoding.encoding),
777 785 pycompat.sysstr(encoding.encodingmode),
778 786 )
779 787 wrapper = _MBTextWrapper(
780 788 width=width, initial_indent=initindent, subsequent_indent=hangindent
781 789 )
782 790 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
783 791
784 792
785 793 _booleans = {
786 794 b'1': True,
787 795 b'yes': True,
788 796 b'true': True,
789 797 b'on': True,
790 798 b'always': True,
791 799 b'0': False,
792 800 b'no': False,
793 801 b'false': False,
794 802 b'off': False,
795 803 b'never': False,
796 804 }
797 805
798 806
799 807 def parsebool(s):
800 808 """Parse s into a boolean.
801 809
802 810 If s is not a valid boolean, returns None.
803 811 """
804 812 return _booleans.get(s.lower(), None)
805 813
806 814
807 815 def evalpythonliteral(s):
808 816 """Evaluate a string containing a Python literal expression"""
809 817 # We could backport our tokenizer hack to rewrite '' to u'' if we want
810 818 if pycompat.ispy3:
811 819 return ast.literal_eval(s.decode('latin1'))
812 820 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now