##// END OF EJS Templates
stringutil: teach pprint() to recognize generators...
Gregory Szorc -
r39332:ce145f8e default
parent child Browse files
Show More
@@ -1,588 +1,591 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 import types
16 17
17 18 from ..i18n import _
18 19 from ..thirdparty import attr
19 20
20 21 from .. import (
21 22 encoding,
22 23 error,
23 24 pycompat,
24 25 )
25 26
26 27 # regex special chars pulled from https://bugs.python.org/issue29995
27 28 # which was part of Python 3.7.
28 29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 31
31 32 def reescape(pat):
32 33 """Drop-in replacement for re.escape."""
33 34 # NOTE: it is intentional that this works on unicodes and not
34 35 # bytes, as it's only possible to do the escaping with
35 36 # unicode.translate, not bytes.translate. Sigh.
36 37 wantuni = True
37 38 if isinstance(pat, bytes):
38 39 wantuni = False
39 40 pat = pat.decode('latin1')
40 41 pat = pat.translate(_regexescapemap)
41 42 if wantuni:
42 43 return pat
43 44 return pat.encode('latin1')
44 45
45 46 def pprint(o, bprefix=False):
46 47 """Pretty print an object."""
47 48 if isinstance(o, bytes):
48 49 if bprefix:
49 50 return "b'%s'" % escapestr(o)
50 51 return "'%s'" % escapestr(o)
51 52 elif isinstance(o, bytearray):
52 53 # codecs.escape_encode() can't handle bytearray, so escapestr fails
53 54 # without coercion.
54 55 return "bytearray['%s']" % escapestr(bytes(o))
55 56 elif isinstance(o, list):
56 57 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
57 58 elif isinstance(o, dict):
58 59 return '{%s}' % (b', '.join(
59 60 '%s: %s' % (pprint(k, bprefix=bprefix),
60 61 pprint(v, bprefix=bprefix))
61 62 for k, v in sorted(o.items())))
62 63 elif isinstance(o, set):
63 64 return 'set([%s])' % (b', '.join(
64 65 pprint(k, bprefix=bprefix) for k in sorted(o)))
65 66 elif isinstance(o, tuple):
66 67 return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
68 elif isinstance(o, types.GeneratorType):
69 return 'gen[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
67 70 else:
68 71 return pycompat.byterepr(o)
69 72
70 73 def prettyrepr(o):
71 74 """Pretty print a representation of a possibly-nested object"""
72 75 lines = []
73 76 rs = pycompat.byterepr(o)
74 77 p0 = p1 = 0
75 78 while p0 < len(rs):
76 79 # '... field=<type ... field=<type ...'
77 80 # ~~~~~~~~~~~~~~~~
78 81 # p0 p1 q0 q1
79 82 q0 = -1
80 83 q1 = rs.find('<', p1 + 1)
81 84 if q1 < 0:
82 85 q1 = len(rs)
83 86 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
84 87 # backtrack for ' field=<'
85 88 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
86 89 if q0 < 0:
87 90 q0 = q1
88 91 else:
89 92 q0 += 1 # skip ' '
90 93 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
91 94 assert l >= 0
92 95 lines.append((l, rs[p0:q0].rstrip()))
93 96 p0, p1 = q0, q1
94 97 return '\n'.join(' ' * l + s for l, s in lines)
95 98
96 99 def buildrepr(r):
97 100 """Format an optional printable representation from unexpanded bits
98 101
99 102 ======== =================================
100 103 type(r) example
101 104 ======== =================================
102 105 tuple ('<not %r>', other)
103 106 bytes '<branch closed>'
104 107 callable lambda: '<branch %r>' % sorted(b)
105 108 object other
106 109 ======== =================================
107 110 """
108 111 if r is None:
109 112 return ''
110 113 elif isinstance(r, tuple):
111 114 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
112 115 elif isinstance(r, bytes):
113 116 return r
114 117 elif callable(r):
115 118 return r()
116 119 else:
117 120 return pprint(r)
118 121
119 122 def binary(s):
120 123 """return true if a string is binary data"""
121 124 return bool(s and '\0' in s)
122 125
123 126 def stringmatcher(pattern, casesensitive=True):
124 127 """
125 128 accepts a string, possibly starting with 're:' or 'literal:' prefix.
126 129 returns the matcher name, pattern, and matcher function.
127 130 missing or unknown prefixes are treated as literal matches.
128 131
129 132 helper for tests:
130 133 >>> def test(pattern, *tests):
131 134 ... kind, pattern, matcher = stringmatcher(pattern)
132 135 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
133 136 >>> def itest(pattern, *tests):
134 137 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
135 138 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
136 139
137 140 exact matching (no prefix):
138 141 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
139 142 ('literal', 'abcdefg', [False, False, True])
140 143
141 144 regex matching ('re:' prefix)
142 145 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
143 146 ('re', 'a.+b', [False, False, True])
144 147
145 148 force exact matches ('literal:' prefix)
146 149 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
147 150 ('literal', 're:foobar', [False, True])
148 151
149 152 unknown prefixes are ignored and treated as literals
150 153 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
151 154 ('literal', 'foo:bar', [False, False, True])
152 155
153 156 case insensitive regex matches
154 157 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
155 158 ('re', 'A.+b', [False, False, True])
156 159
157 160 case insensitive literal matches
158 161 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
159 162 ('literal', 'ABCDEFG', [False, False, True])
160 163 """
161 164 if pattern.startswith('re:'):
162 165 pattern = pattern[3:]
163 166 try:
164 167 flags = 0
165 168 if not casesensitive:
166 169 flags = remod.I
167 170 regex = remod.compile(pattern, flags)
168 171 except remod.error as e:
169 172 raise error.ParseError(_('invalid regular expression: %s')
170 173 % e)
171 174 return 're', pattern, regex.search
172 175 elif pattern.startswith('literal:'):
173 176 pattern = pattern[8:]
174 177
175 178 match = pattern.__eq__
176 179
177 180 if not casesensitive:
178 181 ipat = encoding.lower(pattern)
179 182 match = lambda s: ipat == encoding.lower(s)
180 183 return 'literal', pattern, match
181 184
182 185 def shortuser(user):
183 186 """Return a short representation of a user name or email address."""
184 187 f = user.find('@')
185 188 if f >= 0:
186 189 user = user[:f]
187 190 f = user.find('<')
188 191 if f >= 0:
189 192 user = user[f + 1:]
190 193 f = user.find(' ')
191 194 if f >= 0:
192 195 user = user[:f]
193 196 f = user.find('.')
194 197 if f >= 0:
195 198 user = user[:f]
196 199 return user
197 200
198 201 def emailuser(user):
199 202 """Return the user portion of an email address."""
200 203 f = user.find('@')
201 204 if f >= 0:
202 205 user = user[:f]
203 206 f = user.find('<')
204 207 if f >= 0:
205 208 user = user[f + 1:]
206 209 return user
207 210
208 211 def email(author):
209 212 '''get email of author.'''
210 213 r = author.find('>')
211 214 if r == -1:
212 215 r = None
213 216 return author[author.find('<') + 1:r]
214 217
215 218 def person(author):
216 219 """Returns the name before an email address,
217 220 interpreting it as per RFC 5322
218 221
219 222 >>> person(b'foo@bar')
220 223 'foo'
221 224 >>> person(b'Foo Bar <foo@bar>')
222 225 'Foo Bar'
223 226 >>> person(b'"Foo Bar" <foo@bar>')
224 227 'Foo Bar'
225 228 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
226 229 'Foo "buz" Bar'
227 230 >>> # The following are invalid, but do exist in real-life
228 231 ...
229 232 >>> person(b'Foo "buz" Bar <foo@bar>')
230 233 'Foo "buz" Bar'
231 234 >>> person(b'"Foo Bar <foo@bar>')
232 235 'Foo Bar'
233 236 """
234 237 if '@' not in author:
235 238 return author
236 239 f = author.find('<')
237 240 if f != -1:
238 241 return author[:f].strip(' "').replace('\\"', '"')
239 242 f = author.find('@')
240 243 return author[:f].replace('.', ' ')
241 244
242 245 @attr.s(hash=True)
243 246 class mailmapping(object):
244 247 '''Represents a username/email key or value in
245 248 a mailmap file'''
246 249 email = attr.ib()
247 250 name = attr.ib(default=None)
248 251
249 252 def _ismailmaplineinvalid(names, emails):
250 253 '''Returns True if the parsed names and emails
251 254 in a mailmap entry are invalid.
252 255
253 256 >>> # No names or emails fails
254 257 >>> names, emails = [], []
255 258 >>> _ismailmaplineinvalid(names, emails)
256 259 True
257 260 >>> # Only one email fails
258 261 >>> emails = [b'email@email.com']
259 262 >>> _ismailmaplineinvalid(names, emails)
260 263 True
261 264 >>> # One email and one name passes
262 265 >>> names = [b'Test Name']
263 266 >>> _ismailmaplineinvalid(names, emails)
264 267 False
265 268 >>> # No names but two emails passes
266 269 >>> names = []
267 270 >>> emails = [b'proper@email.com', b'commit@email.com']
268 271 >>> _ismailmaplineinvalid(names, emails)
269 272 False
270 273 '''
271 274 return not emails or not names and len(emails) < 2
272 275
273 276 def parsemailmap(mailmapcontent):
274 277 """Parses data in the .mailmap format
275 278
276 279 >>> mmdata = b"\\n".join([
277 280 ... b'# Comment',
278 281 ... b'Name <commit1@email.xx>',
279 282 ... b'<name@email.xx> <commit2@email.xx>',
280 283 ... b'Name <proper@email.xx> <commit3@email.xx>',
281 284 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
282 285 ... ])
283 286 >>> mm = parsemailmap(mmdata)
284 287 >>> for key in sorted(mm.keys()):
285 288 ... print(key)
286 289 mailmapping(email='commit1@email.xx', name=None)
287 290 mailmapping(email='commit2@email.xx', name=None)
288 291 mailmapping(email='commit3@email.xx', name=None)
289 292 mailmapping(email='commit4@email.xx', name='Commit')
290 293 >>> for val in sorted(mm.values()):
291 294 ... print(val)
292 295 mailmapping(email='commit1@email.xx', name='Name')
293 296 mailmapping(email='name@email.xx', name=None)
294 297 mailmapping(email='proper@email.xx', name='Name')
295 298 mailmapping(email='proper@email.xx', name='Name')
296 299 """
297 300 mailmap = {}
298 301
299 302 if mailmapcontent is None:
300 303 return mailmap
301 304
302 305 for line in mailmapcontent.splitlines():
303 306
304 307 # Don't bother checking the line if it is a comment or
305 308 # is an improperly formed author field
306 309 if line.lstrip().startswith('#'):
307 310 continue
308 311
309 312 # names, emails hold the parsed emails and names for each line
310 313 # name_builder holds the words in a persons name
311 314 names, emails = [], []
312 315 namebuilder = []
313 316
314 317 for element in line.split():
315 318 if element.startswith('#'):
316 319 # If we reach a comment in the mailmap file, move on
317 320 break
318 321
319 322 elif element.startswith('<') and element.endswith('>'):
320 323 # We have found an email.
321 324 # Parse it, and finalize any names from earlier
322 325 emails.append(element[1:-1]) # Slice off the "<>"
323 326
324 327 if namebuilder:
325 328 names.append(' '.join(namebuilder))
326 329 namebuilder = []
327 330
328 331 # Break if we have found a second email, any other
329 332 # data does not fit the spec for .mailmap
330 333 if len(emails) > 1:
331 334 break
332 335
333 336 else:
334 337 # We have found another word in the committers name
335 338 namebuilder.append(element)
336 339
337 340 # Check to see if we have parsed the line into a valid form
338 341 # We require at least one email, and either at least one
339 342 # name or a second email
340 343 if _ismailmaplineinvalid(names, emails):
341 344 continue
342 345
343 346 mailmapkey = mailmapping(
344 347 email=emails[-1],
345 348 name=names[-1] if len(names) == 2 else None,
346 349 )
347 350
348 351 mailmap[mailmapkey] = mailmapping(
349 352 email=emails[0],
350 353 name=names[0] if names else None,
351 354 )
352 355
353 356 return mailmap
354 357
355 358 def mapname(mailmap, author):
356 359 """Returns the author field according to the mailmap cache, or
357 360 the original author field.
358 361
359 362 >>> mmdata = b"\\n".join([
360 363 ... b'# Comment',
361 364 ... b'Name <commit1@email.xx>',
362 365 ... b'<name@email.xx> <commit2@email.xx>',
363 366 ... b'Name <proper@email.xx> <commit3@email.xx>',
364 367 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
365 368 ... ])
366 369 >>> m = parsemailmap(mmdata)
367 370 >>> mapname(m, b'Commit <commit1@email.xx>')
368 371 'Name <commit1@email.xx>'
369 372 >>> mapname(m, b'Name <commit2@email.xx>')
370 373 'Name <name@email.xx>'
371 374 >>> mapname(m, b'Commit <commit3@email.xx>')
372 375 'Name <proper@email.xx>'
373 376 >>> mapname(m, b'Commit <commit4@email.xx>')
374 377 'Name <proper@email.xx>'
375 378 >>> mapname(m, b'Unknown Name <unknown@email.com>')
376 379 'Unknown Name <unknown@email.com>'
377 380 """
378 381 # If the author field coming in isn't in the correct format,
379 382 # or the mailmap is empty just return the original author field
380 383 if not isauthorwellformed(author) or not mailmap:
381 384 return author
382 385
383 386 # Turn the user name into a mailmapping
384 387 commit = mailmapping(name=person(author), email=email(author))
385 388
386 389 try:
387 390 # Try and use both the commit email and name as the key
388 391 proper = mailmap[commit]
389 392
390 393 except KeyError:
391 394 # If the lookup fails, use just the email as the key instead
392 395 # We call this commit2 as not to erase original commit fields
393 396 commit2 = mailmapping(email=commit.email)
394 397 proper = mailmap.get(commit2, mailmapping(None, None))
395 398
396 399 # Return the author field with proper values filled in
397 400 return '%s <%s>' % (
398 401 proper.name if proper.name else commit.name,
399 402 proper.email if proper.email else commit.email,
400 403 )
401 404
402 405 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
403 406
404 407 def isauthorwellformed(author):
405 408 '''Return True if the author field is well formed
406 409 (ie "Contributor Name <contrib@email.dom>")
407 410
408 411 >>> isauthorwellformed(b'Good Author <good@author.com>')
409 412 True
410 413 >>> isauthorwellformed(b'Author <good@author.com>')
411 414 True
412 415 >>> isauthorwellformed(b'Bad Author')
413 416 False
414 417 >>> isauthorwellformed(b'Bad Author <author@author.com')
415 418 False
416 419 >>> isauthorwellformed(b'Bad Author author@author.com')
417 420 False
418 421 >>> isauthorwellformed(b'<author@author.com>')
419 422 False
420 423 >>> isauthorwellformed(b'Bad Author <author>')
421 424 False
422 425 '''
423 426 return _correctauthorformat.match(author) is not None
424 427
425 428 def ellipsis(text, maxlength=400):
426 429 """Trim string to at most maxlength (default: 400) columns in display."""
427 430 return encoding.trim(text, maxlength, ellipsis='...')
428 431
429 432 def escapestr(s):
430 433 if isinstance(s, memoryview):
431 434 s = bytes(s)
432 435 # call underlying function of s.encode('string_escape') directly for
433 436 # Python 3 compatibility
434 437 return codecs.escape_encode(s)[0]
435 438
436 439 def unescapestr(s):
437 440 return codecs.escape_decode(s)[0]
438 441
439 442 def forcebytestr(obj):
440 443 """Portably format an arbitrary object (e.g. exception) into a byte
441 444 string."""
442 445 try:
443 446 return pycompat.bytestr(obj)
444 447 except UnicodeEncodeError:
445 448 # non-ascii string, may be lossy
446 449 return pycompat.bytestr(encoding.strtolocal(str(obj)))
447 450
448 451 def uirepr(s):
449 452 # Avoid double backslash in Windows path repr()
450 453 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
451 454
452 455 # delay import of textwrap
453 456 def _MBTextWrapper(**kwargs):
454 457 class tw(textwrap.TextWrapper):
455 458 """
456 459 Extend TextWrapper for width-awareness.
457 460
458 461 Neither number of 'bytes' in any encoding nor 'characters' is
459 462 appropriate to calculate terminal columns for specified string.
460 463
461 464 Original TextWrapper implementation uses built-in 'len()' directly,
462 465 so overriding is needed to use width information of each characters.
463 466
464 467 In addition, characters classified into 'ambiguous' width are
465 468 treated as wide in East Asian area, but as narrow in other.
466 469
467 470 This requires use decision to determine width of such characters.
468 471 """
469 472 def _cutdown(self, ucstr, space_left):
470 473 l = 0
471 474 colwidth = encoding.ucolwidth
472 475 for i in pycompat.xrange(len(ucstr)):
473 476 l += colwidth(ucstr[i])
474 477 if space_left < l:
475 478 return (ucstr[:i], ucstr[i:])
476 479 return ucstr, ''
477 480
478 481 # overriding of base class
479 482 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
480 483 space_left = max(width - cur_len, 1)
481 484
482 485 if self.break_long_words:
483 486 cut, res = self._cutdown(reversed_chunks[-1], space_left)
484 487 cur_line.append(cut)
485 488 reversed_chunks[-1] = res
486 489 elif not cur_line:
487 490 cur_line.append(reversed_chunks.pop())
488 491
489 492 # this overriding code is imported from TextWrapper of Python 2.6
490 493 # to calculate columns of string by 'encoding.ucolwidth()'
491 494 def _wrap_chunks(self, chunks):
492 495 colwidth = encoding.ucolwidth
493 496
494 497 lines = []
495 498 if self.width <= 0:
496 499 raise ValueError("invalid width %r (must be > 0)" % self.width)
497 500
498 501 # Arrange in reverse order so items can be efficiently popped
499 502 # from a stack of chucks.
500 503 chunks.reverse()
501 504
502 505 while chunks:
503 506
504 507 # Start the list of chunks that will make up the current line.
505 508 # cur_len is just the length of all the chunks in cur_line.
506 509 cur_line = []
507 510 cur_len = 0
508 511
509 512 # Figure out which static string will prefix this line.
510 513 if lines:
511 514 indent = self.subsequent_indent
512 515 else:
513 516 indent = self.initial_indent
514 517
515 518 # Maximum width for this line.
516 519 width = self.width - len(indent)
517 520
518 521 # First chunk on line is whitespace -- drop it, unless this
519 522 # is the very beginning of the text (i.e. no lines started yet).
520 523 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
521 524 del chunks[-1]
522 525
523 526 while chunks:
524 527 l = colwidth(chunks[-1])
525 528
526 529 # Can at least squeeze this chunk onto the current line.
527 530 if cur_len + l <= width:
528 531 cur_line.append(chunks.pop())
529 532 cur_len += l
530 533
531 534 # Nope, this line is full.
532 535 else:
533 536 break
534 537
535 538 # The current line is full, and the next chunk is too big to
536 539 # fit on *any* line (not just this one).
537 540 if chunks and colwidth(chunks[-1]) > width:
538 541 self._handle_long_word(chunks, cur_line, cur_len, width)
539 542
540 543 # If the last chunk on this line is all whitespace, drop it.
541 544 if (self.drop_whitespace and
542 545 cur_line and cur_line[-1].strip() == r''):
543 546 del cur_line[-1]
544 547
545 548 # Convert current line back to a string and store it in list
546 549 # of all lines (return value).
547 550 if cur_line:
548 551 lines.append(indent + r''.join(cur_line))
549 552
550 553 return lines
551 554
552 555 global _MBTextWrapper
553 556 _MBTextWrapper = tw
554 557 return tw(**kwargs)
555 558
556 559 def wrap(line, width, initindent='', hangindent=''):
557 560 maxindent = max(len(hangindent), len(initindent))
558 561 if width <= maxindent:
559 562 # adjust for weird terminal size
560 563 width = max(78, maxindent + 1)
561 564 line = line.decode(pycompat.sysstr(encoding.encoding),
562 565 pycompat.sysstr(encoding.encodingmode))
563 566 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
564 567 pycompat.sysstr(encoding.encodingmode))
565 568 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
566 569 pycompat.sysstr(encoding.encodingmode))
567 570 wrapper = _MBTextWrapper(width=width,
568 571 initial_indent=initindent,
569 572 subsequent_indent=hangindent)
570 573 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
571 574
572 575 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
573 576 '0': False, 'no': False, 'false': False, 'off': False,
574 577 'never': False}
575 578
576 579 def parsebool(s):
577 580 """Parse s into a boolean.
578 581
579 582 If s is not a valid boolean, returns None.
580 583 """
581 584 return _booleans.get(s.lower(), None)
582 585
583 586 def evalpythonliteral(s):
584 587 """Evaluate a string containing a Python literal expression"""
585 588 # We could backport our tokenizer hack to rewrite '' to u'' if we want
586 589 if pycompat.ispy3:
587 590 return ast.literal_eval(s.decode('latin1'))
588 591 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now