##// END OF EJS Templates
stringutil: have buildrepr delegate to pprint for unknown types...
Augie Fackler -
r39087:38409be2 default
parent child Browse files
Show More
@@ -1,586 +1,586 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 16
17 17 from ..i18n import _
18 18 from ..thirdparty import attr
19 19
20 20 from .. import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 )
25 25
26 26 # regex special chars pulled from https://bugs.python.org/issue29995
27 27 # which was part of Python 3.7.
28 28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 30
31 31 def reescape(pat):
32 32 """Drop-in replacement for re.escape."""
33 33 # NOTE: it is intentional that this works on unicodes and not
34 34 # bytes, as it's only possible to do the escaping with
35 35 # unicode.translate, not bytes.translate. Sigh.
36 36 wantuni = True
37 37 if isinstance(pat, bytes):
38 38 wantuni = False
39 39 pat = pat.decode('latin1')
40 40 pat = pat.translate(_regexescapemap)
41 41 if wantuni:
42 42 return pat
43 43 return pat.encode('latin1')
44 44
45 45 def pprint(o, bprefix=False):
46 46 """Pretty print an object."""
47 47 if isinstance(o, bytes):
48 48 if bprefix:
49 49 return "b'%s'" % escapestr(o)
50 50 return "'%s'" % escapestr(o)
51 51 elif isinstance(o, bytearray):
52 52 # codecs.escape_encode() can't handle bytearray, so escapestr fails
53 53 # without coercion.
54 54 return "bytearray['%s']" % escapestr(bytes(o))
55 55 elif isinstance(o, list):
56 56 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
57 57 elif isinstance(o, dict):
58 58 return '{%s}' % (b', '.join(
59 59 '%s: %s' % (pprint(k, bprefix=bprefix),
60 60 pprint(v, bprefix=bprefix))
61 61 for k, v in sorted(o.items())))
62 62 elif isinstance(o, set):
63 63 return 'set([%s])' % (b', '.join(
64 64 pprint(k, bprefix=bprefix) for k in sorted(o)))
65 65 elif isinstance(o, tuple):
66 66 return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
67 67 else:
68 68 return pycompat.byterepr(o)
69 69
70 70 def prettyrepr(o):
71 71 """Pretty print a representation of a possibly-nested object"""
72 72 lines = []
73 73 rs = pycompat.byterepr(o)
74 74 p0 = p1 = 0
75 75 while p0 < len(rs):
76 76 # '... field=<type ... field=<type ...'
77 77 # ~~~~~~~~~~~~~~~~
78 78 # p0 p1 q0 q1
79 79 q0 = -1
80 80 q1 = rs.find('<', p1 + 1)
81 81 if q1 < 0:
82 82 q1 = len(rs)
83 83 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
84 84 # backtrack for ' field=<'
85 85 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
86 86 if q0 < 0:
87 87 q0 = q1
88 88 else:
89 89 q0 += 1 # skip ' '
90 90 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
91 91 assert l >= 0
92 92 lines.append((l, rs[p0:q0].rstrip()))
93 93 p0, p1 = q0, q1
94 94 return '\n'.join(' ' * l + s for l, s in lines)
95 95
96 96 def buildrepr(r):
97 97 """Format an optional printable representation from unexpanded bits
98 98
99 99 ======== =================================
100 100 type(r) example
101 101 ======== =================================
102 102 tuple ('<not %r>', other)
103 103 bytes '<branch closed>'
104 104 callable lambda: '<branch %r>' % sorted(b)
105 105 object other
106 106 ======== =================================
107 107 """
108 108 if r is None:
109 109 return ''
110 110 elif isinstance(r, tuple):
111 111 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
112 112 elif isinstance(r, bytes):
113 113 return r
114 114 elif callable(r):
115 115 return r()
116 116 else:
117 return pycompat.byterepr(r)
117 return pprint(r)
118 118
119 119 def binary(s):
120 120 """return true if a string is binary data"""
121 121 return bool(s and '\0' in s)
122 122
123 123 def stringmatcher(pattern, casesensitive=True):
124 124 """
125 125 accepts a string, possibly starting with 're:' or 'literal:' prefix.
126 126 returns the matcher name, pattern, and matcher function.
127 127 missing or unknown prefixes are treated as literal matches.
128 128
129 129 helper for tests:
130 130 >>> def test(pattern, *tests):
131 131 ... kind, pattern, matcher = stringmatcher(pattern)
132 132 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
133 133 >>> def itest(pattern, *tests):
134 134 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
135 135 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
136 136
137 137 exact matching (no prefix):
138 138 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
139 139 ('literal', 'abcdefg', [False, False, True])
140 140
141 141 regex matching ('re:' prefix)
142 142 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
143 143 ('re', 'a.+b', [False, False, True])
144 144
145 145 force exact matches ('literal:' prefix)
146 146 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
147 147 ('literal', 're:foobar', [False, True])
148 148
149 149 unknown prefixes are ignored and treated as literals
150 150 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
151 151 ('literal', 'foo:bar', [False, False, True])
152 152
153 153 case insensitive regex matches
154 154 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
155 155 ('re', 'A.+b', [False, False, True])
156 156
157 157 case insensitive literal matches
158 158 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
159 159 ('literal', 'ABCDEFG', [False, False, True])
160 160 """
161 161 if pattern.startswith('re:'):
162 162 pattern = pattern[3:]
163 163 try:
164 164 flags = 0
165 165 if not casesensitive:
166 166 flags = remod.I
167 167 regex = remod.compile(pattern, flags)
168 168 except remod.error as e:
169 169 raise error.ParseError(_('invalid regular expression: %s')
170 170 % e)
171 171 return 're', pattern, regex.search
172 172 elif pattern.startswith('literal:'):
173 173 pattern = pattern[8:]
174 174
175 175 match = pattern.__eq__
176 176
177 177 if not casesensitive:
178 178 ipat = encoding.lower(pattern)
179 179 match = lambda s: ipat == encoding.lower(s)
180 180 return 'literal', pattern, match
181 181
182 182 def shortuser(user):
183 183 """Return a short representation of a user name or email address."""
184 184 f = user.find('@')
185 185 if f >= 0:
186 186 user = user[:f]
187 187 f = user.find('<')
188 188 if f >= 0:
189 189 user = user[f + 1:]
190 190 f = user.find(' ')
191 191 if f >= 0:
192 192 user = user[:f]
193 193 f = user.find('.')
194 194 if f >= 0:
195 195 user = user[:f]
196 196 return user
197 197
198 198 def emailuser(user):
199 199 """Return the user portion of an email address."""
200 200 f = user.find('@')
201 201 if f >= 0:
202 202 user = user[:f]
203 203 f = user.find('<')
204 204 if f >= 0:
205 205 user = user[f + 1:]
206 206 return user
207 207
208 208 def email(author):
209 209 '''get email of author.'''
210 210 r = author.find('>')
211 211 if r == -1:
212 212 r = None
213 213 return author[author.find('<') + 1:r]
214 214
215 215 def person(author):
216 216 """Returns the name before an email address,
217 217 interpreting it as per RFC 5322
218 218
219 219 >>> person(b'foo@bar')
220 220 'foo'
221 221 >>> person(b'Foo Bar <foo@bar>')
222 222 'Foo Bar'
223 223 >>> person(b'"Foo Bar" <foo@bar>')
224 224 'Foo Bar'
225 225 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
226 226 'Foo "buz" Bar'
227 227 >>> # The following are invalid, but do exist in real-life
228 228 ...
229 229 >>> person(b'Foo "buz" Bar <foo@bar>')
230 230 'Foo "buz" Bar'
231 231 >>> person(b'"Foo Bar <foo@bar>')
232 232 'Foo Bar'
233 233 """
234 234 if '@' not in author:
235 235 return author
236 236 f = author.find('<')
237 237 if f != -1:
238 238 return author[:f].strip(' "').replace('\\"', '"')
239 239 f = author.find('@')
240 240 return author[:f].replace('.', ' ')
241 241
242 242 @attr.s(hash=True)
243 243 class mailmapping(object):
244 244 '''Represents a username/email key or value in
245 245 a mailmap file'''
246 246 email = attr.ib()
247 247 name = attr.ib(default=None)
248 248
249 249 def _ismailmaplineinvalid(names, emails):
250 250 '''Returns True if the parsed names and emails
251 251 in a mailmap entry are invalid.
252 252
253 253 >>> # No names or emails fails
254 254 >>> names, emails = [], []
255 255 >>> _ismailmaplineinvalid(names, emails)
256 256 True
257 257 >>> # Only one email fails
258 258 >>> emails = [b'email@email.com']
259 259 >>> _ismailmaplineinvalid(names, emails)
260 260 True
261 261 >>> # One email and one name passes
262 262 >>> names = [b'Test Name']
263 263 >>> _ismailmaplineinvalid(names, emails)
264 264 False
265 265 >>> # No names but two emails passes
266 266 >>> names = []
267 267 >>> emails = [b'proper@email.com', b'commit@email.com']
268 268 >>> _ismailmaplineinvalid(names, emails)
269 269 False
270 270 '''
271 271 return not emails or not names and len(emails) < 2
272 272
273 273 def parsemailmap(mailmapcontent):
274 274 """Parses data in the .mailmap format
275 275
276 276 >>> mmdata = b"\\n".join([
277 277 ... b'# Comment',
278 278 ... b'Name <commit1@email.xx>',
279 279 ... b'<name@email.xx> <commit2@email.xx>',
280 280 ... b'Name <proper@email.xx> <commit3@email.xx>',
281 281 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
282 282 ... ])
283 283 >>> mm = parsemailmap(mmdata)
284 284 >>> for key in sorted(mm.keys()):
285 285 ... print(key)
286 286 mailmapping(email='commit1@email.xx', name=None)
287 287 mailmapping(email='commit2@email.xx', name=None)
288 288 mailmapping(email='commit3@email.xx', name=None)
289 289 mailmapping(email='commit4@email.xx', name='Commit')
290 290 >>> for val in sorted(mm.values()):
291 291 ... print(val)
292 292 mailmapping(email='commit1@email.xx', name='Name')
293 293 mailmapping(email='name@email.xx', name=None)
294 294 mailmapping(email='proper@email.xx', name='Name')
295 295 mailmapping(email='proper@email.xx', name='Name')
296 296 """
297 297 mailmap = {}
298 298
299 299 if mailmapcontent is None:
300 300 return mailmap
301 301
302 302 for line in mailmapcontent.splitlines():
303 303
304 304 # Don't bother checking the line if it is a comment or
305 305 # is an improperly formed author field
306 306 if line.lstrip().startswith('#'):
307 307 continue
308 308
309 309 # names, emails hold the parsed emails and names for each line
310 310 # name_builder holds the words in a persons name
311 311 names, emails = [], []
312 312 namebuilder = []
313 313
314 314 for element in line.split():
315 315 if element.startswith('#'):
316 316 # If we reach a comment in the mailmap file, move on
317 317 break
318 318
319 319 elif element.startswith('<') and element.endswith('>'):
320 320 # We have found an email.
321 321 # Parse it, and finalize any names from earlier
322 322 emails.append(element[1:-1]) # Slice off the "<>"
323 323
324 324 if namebuilder:
325 325 names.append(' '.join(namebuilder))
326 326 namebuilder = []
327 327
328 328 # Break if we have found a second email, any other
329 329 # data does not fit the spec for .mailmap
330 330 if len(emails) > 1:
331 331 break
332 332
333 333 else:
334 334 # We have found another word in the committers name
335 335 namebuilder.append(element)
336 336
337 337 # Check to see if we have parsed the line into a valid form
338 338 # We require at least one email, and either at least one
339 339 # name or a second email
340 340 if _ismailmaplineinvalid(names, emails):
341 341 continue
342 342
343 343 mailmapkey = mailmapping(
344 344 email=emails[-1],
345 345 name=names[-1] if len(names) == 2 else None,
346 346 )
347 347
348 348 mailmap[mailmapkey] = mailmapping(
349 349 email=emails[0],
350 350 name=names[0] if names else None,
351 351 )
352 352
353 353 return mailmap
354 354
355 355 def mapname(mailmap, author):
356 356 """Returns the author field according to the mailmap cache, or
357 357 the original author field.
358 358
359 359 >>> mmdata = b"\\n".join([
360 360 ... b'# Comment',
361 361 ... b'Name <commit1@email.xx>',
362 362 ... b'<name@email.xx> <commit2@email.xx>',
363 363 ... b'Name <proper@email.xx> <commit3@email.xx>',
364 364 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
365 365 ... ])
366 366 >>> m = parsemailmap(mmdata)
367 367 >>> mapname(m, b'Commit <commit1@email.xx>')
368 368 'Name <commit1@email.xx>'
369 369 >>> mapname(m, b'Name <commit2@email.xx>')
370 370 'Name <name@email.xx>'
371 371 >>> mapname(m, b'Commit <commit3@email.xx>')
372 372 'Name <proper@email.xx>'
373 373 >>> mapname(m, b'Commit <commit4@email.xx>')
374 374 'Name <proper@email.xx>'
375 375 >>> mapname(m, b'Unknown Name <unknown@email.com>')
376 376 'Unknown Name <unknown@email.com>'
377 377 """
378 378 # If the author field coming in isn't in the correct format,
379 379 # or the mailmap is empty just return the original author field
380 380 if not isauthorwellformed(author) or not mailmap:
381 381 return author
382 382
383 383 # Turn the user name into a mailmapping
384 384 commit = mailmapping(name=person(author), email=email(author))
385 385
386 386 try:
387 387 # Try and use both the commit email and name as the key
388 388 proper = mailmap[commit]
389 389
390 390 except KeyError:
391 391 # If the lookup fails, use just the email as the key instead
392 392 # We call this commit2 as not to erase original commit fields
393 393 commit2 = mailmapping(email=commit.email)
394 394 proper = mailmap.get(commit2, mailmapping(None, None))
395 395
396 396 # Return the author field with proper values filled in
397 397 return '%s <%s>' % (
398 398 proper.name if proper.name else commit.name,
399 399 proper.email if proper.email else commit.email,
400 400 )
401 401
402 402 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
403 403
404 404 def isauthorwellformed(author):
405 405 '''Return True if the author field is well formed
406 406 (ie "Contributor Name <contrib@email.dom>")
407 407
408 408 >>> isauthorwellformed(b'Good Author <good@author.com>')
409 409 True
410 410 >>> isauthorwellformed(b'Author <good@author.com>')
411 411 True
412 412 >>> isauthorwellformed(b'Bad Author')
413 413 False
414 414 >>> isauthorwellformed(b'Bad Author <author@author.com')
415 415 False
416 416 >>> isauthorwellformed(b'Bad Author author@author.com')
417 417 False
418 418 >>> isauthorwellformed(b'<author@author.com>')
419 419 False
420 420 >>> isauthorwellformed(b'Bad Author <author>')
421 421 False
422 422 '''
423 423 return _correctauthorformat.match(author) is not None
424 424
425 425 def ellipsis(text, maxlength=400):
426 426 """Trim string to at most maxlength (default: 400) columns in display."""
427 427 return encoding.trim(text, maxlength, ellipsis='...')
428 428
429 429 def escapestr(s):
430 430 # call underlying function of s.encode('string_escape') directly for
431 431 # Python 3 compatibility
432 432 return codecs.escape_encode(s)[0]
433 433
434 434 def unescapestr(s):
435 435 return codecs.escape_decode(s)[0]
436 436
437 437 def forcebytestr(obj):
438 438 """Portably format an arbitrary object (e.g. exception) into a byte
439 439 string."""
440 440 try:
441 441 return pycompat.bytestr(obj)
442 442 except UnicodeEncodeError:
443 443 # non-ascii string, may be lossy
444 444 return pycompat.bytestr(encoding.strtolocal(str(obj)))
445 445
446 446 def uirepr(s):
447 447 # Avoid double backslash in Windows path repr()
448 448 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
449 449
450 450 # delay import of textwrap
451 451 def _MBTextWrapper(**kwargs):
452 452 class tw(textwrap.TextWrapper):
453 453 """
454 454 Extend TextWrapper for width-awareness.
455 455
456 456 Neither number of 'bytes' in any encoding nor 'characters' is
457 457 appropriate to calculate terminal columns for specified string.
458 458
459 459 Original TextWrapper implementation uses built-in 'len()' directly,
460 460 so overriding is needed to use width information of each characters.
461 461
462 462 In addition, characters classified into 'ambiguous' width are
463 463 treated as wide in East Asian area, but as narrow in other.
464 464
465 465 This requires use decision to determine width of such characters.
466 466 """
467 467 def _cutdown(self, ucstr, space_left):
468 468 l = 0
469 469 colwidth = encoding.ucolwidth
470 470 for i in pycompat.xrange(len(ucstr)):
471 471 l += colwidth(ucstr[i])
472 472 if space_left < l:
473 473 return (ucstr[:i], ucstr[i:])
474 474 return ucstr, ''
475 475
476 476 # overriding of base class
477 477 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
478 478 space_left = max(width - cur_len, 1)
479 479
480 480 if self.break_long_words:
481 481 cut, res = self._cutdown(reversed_chunks[-1], space_left)
482 482 cur_line.append(cut)
483 483 reversed_chunks[-1] = res
484 484 elif not cur_line:
485 485 cur_line.append(reversed_chunks.pop())
486 486
487 487 # this overriding code is imported from TextWrapper of Python 2.6
488 488 # to calculate columns of string by 'encoding.ucolwidth()'
489 489 def _wrap_chunks(self, chunks):
490 490 colwidth = encoding.ucolwidth
491 491
492 492 lines = []
493 493 if self.width <= 0:
494 494 raise ValueError("invalid width %r (must be > 0)" % self.width)
495 495
496 496 # Arrange in reverse order so items can be efficiently popped
497 497 # from a stack of chucks.
498 498 chunks.reverse()
499 499
500 500 while chunks:
501 501
502 502 # Start the list of chunks that will make up the current line.
503 503 # cur_len is just the length of all the chunks in cur_line.
504 504 cur_line = []
505 505 cur_len = 0
506 506
507 507 # Figure out which static string will prefix this line.
508 508 if lines:
509 509 indent = self.subsequent_indent
510 510 else:
511 511 indent = self.initial_indent
512 512
513 513 # Maximum width for this line.
514 514 width = self.width - len(indent)
515 515
516 516 # First chunk on line is whitespace -- drop it, unless this
517 517 # is the very beginning of the text (i.e. no lines started yet).
518 518 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
519 519 del chunks[-1]
520 520
521 521 while chunks:
522 522 l = colwidth(chunks[-1])
523 523
524 524 # Can at least squeeze this chunk onto the current line.
525 525 if cur_len + l <= width:
526 526 cur_line.append(chunks.pop())
527 527 cur_len += l
528 528
529 529 # Nope, this line is full.
530 530 else:
531 531 break
532 532
533 533 # The current line is full, and the next chunk is too big to
534 534 # fit on *any* line (not just this one).
535 535 if chunks and colwidth(chunks[-1]) > width:
536 536 self._handle_long_word(chunks, cur_line, cur_len, width)
537 537
538 538 # If the last chunk on this line is all whitespace, drop it.
539 539 if (self.drop_whitespace and
540 540 cur_line and cur_line[-1].strip() == r''):
541 541 del cur_line[-1]
542 542
543 543 # Convert current line back to a string and store it in list
544 544 # of all lines (return value).
545 545 if cur_line:
546 546 lines.append(indent + r''.join(cur_line))
547 547
548 548 return lines
549 549
550 550 global _MBTextWrapper
551 551 _MBTextWrapper = tw
552 552 return tw(**kwargs)
553 553
554 554 def wrap(line, width, initindent='', hangindent=''):
555 555 maxindent = max(len(hangindent), len(initindent))
556 556 if width <= maxindent:
557 557 # adjust for weird terminal size
558 558 width = max(78, maxindent + 1)
559 559 line = line.decode(pycompat.sysstr(encoding.encoding),
560 560 pycompat.sysstr(encoding.encodingmode))
561 561 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
562 562 pycompat.sysstr(encoding.encodingmode))
563 563 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
564 564 pycompat.sysstr(encoding.encodingmode))
565 565 wrapper = _MBTextWrapper(width=width,
566 566 initial_indent=initindent,
567 567 subsequent_indent=hangindent)
568 568 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
569 569
570 570 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
571 571 '0': False, 'no': False, 'false': False, 'off': False,
572 572 'never': False}
573 573
574 574 def parsebool(s):
575 575 """Parse s into a boolean.
576 576
577 577 If s is not a valid boolean, returns None.
578 578 """
579 579 return _booleans.get(s.lower(), None)
580 580
581 581 def evalpythonliteral(s):
582 582 """Evaluate a string containing a Python literal expression"""
583 583 # We could backport our tokenizer hack to rewrite '' to u'' if we want
584 584 if pycompat.ispy3:
585 585 return ast.literal_eval(s.decode('latin1'))
586 586 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now