##// END OF EJS Templates
stringutil: teach pprint about sets...
Augie Fackler -
r39086:2aebe138 default
parent child Browse files
Show More
@@ -1,583 +1,586 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 16
17 17 from ..i18n import _
18 18 from ..thirdparty import attr
19 19
20 20 from .. import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 )
25 25
26 26 # regex special chars pulled from https://bugs.python.org/issue29995
27 27 # which was part of Python 3.7.
28 28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 30
31 31 def reescape(pat):
32 32 """Drop-in replacement for re.escape."""
33 33 # NOTE: it is intentional that this works on unicodes and not
34 34 # bytes, as it's only possible to do the escaping with
35 35 # unicode.translate, not bytes.translate. Sigh.
36 36 wantuni = True
37 37 if isinstance(pat, bytes):
38 38 wantuni = False
39 39 pat = pat.decode('latin1')
40 40 pat = pat.translate(_regexescapemap)
41 41 if wantuni:
42 42 return pat
43 43 return pat.encode('latin1')
44 44
45 45 def pprint(o, bprefix=False):
46 46 """Pretty print an object."""
47 47 if isinstance(o, bytes):
48 48 if bprefix:
49 49 return "b'%s'" % escapestr(o)
50 50 return "'%s'" % escapestr(o)
51 51 elif isinstance(o, bytearray):
52 52 # codecs.escape_encode() can't handle bytearray, so escapestr fails
53 53 # without coercion.
54 54 return "bytearray['%s']" % escapestr(bytes(o))
55 55 elif isinstance(o, list):
56 56 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
57 57 elif isinstance(o, dict):
58 58 return '{%s}' % (b', '.join(
59 59 '%s: %s' % (pprint(k, bprefix=bprefix),
60 60 pprint(v, bprefix=bprefix))
61 61 for k, v in sorted(o.items())))
62 elif isinstance(o, set):
63 return 'set([%s])' % (b', '.join(
64 pprint(k, bprefix=bprefix) for k in sorted(o)))
62 65 elif isinstance(o, tuple):
63 66 return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
64 67 else:
65 68 return pycompat.byterepr(o)
66 69
67 70 def prettyrepr(o):
68 71 """Pretty print a representation of a possibly-nested object"""
69 72 lines = []
70 73 rs = pycompat.byterepr(o)
71 74 p0 = p1 = 0
72 75 while p0 < len(rs):
73 76 # '... field=<type ... field=<type ...'
74 77 # ~~~~~~~~~~~~~~~~
75 78 # p0 p1 q0 q1
76 79 q0 = -1
77 80 q1 = rs.find('<', p1 + 1)
78 81 if q1 < 0:
79 82 q1 = len(rs)
80 83 elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
81 84 # backtrack for ' field=<'
82 85 q0 = rs.rfind(' ', p1 + 1, q1 - 1)
83 86 if q0 < 0:
84 87 q0 = q1
85 88 else:
86 89 q0 += 1 # skip ' '
87 90 l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
88 91 assert l >= 0
89 92 lines.append((l, rs[p0:q0].rstrip()))
90 93 p0, p1 = q0, q1
91 94 return '\n'.join(' ' * l + s for l, s in lines)
92 95
93 96 def buildrepr(r):
94 97 """Format an optional printable representation from unexpanded bits
95 98
96 99 ======== =================================
97 100 type(r) example
98 101 ======== =================================
99 102 tuple ('<not %r>', other)
100 103 bytes '<branch closed>'
101 104 callable lambda: '<branch %r>' % sorted(b)
102 105 object other
103 106 ======== =================================
104 107 """
105 108 if r is None:
106 109 return ''
107 110 elif isinstance(r, tuple):
108 111 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
109 112 elif isinstance(r, bytes):
110 113 return r
111 114 elif callable(r):
112 115 return r()
113 116 else:
114 117 return pycompat.byterepr(r)
115 118
116 119 def binary(s):
117 120 """return true if a string is binary data"""
118 121 return bool(s and '\0' in s)
119 122
120 123 def stringmatcher(pattern, casesensitive=True):
121 124 """
122 125 accepts a string, possibly starting with 're:' or 'literal:' prefix.
123 126 returns the matcher name, pattern, and matcher function.
124 127 missing or unknown prefixes are treated as literal matches.
125 128
126 129 helper for tests:
127 130 >>> def test(pattern, *tests):
128 131 ... kind, pattern, matcher = stringmatcher(pattern)
129 132 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
130 133 >>> def itest(pattern, *tests):
131 134 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
132 135 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
133 136
134 137 exact matching (no prefix):
135 138 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
136 139 ('literal', 'abcdefg', [False, False, True])
137 140
138 141 regex matching ('re:' prefix)
139 142 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
140 143 ('re', 'a.+b', [False, False, True])
141 144
142 145 force exact matches ('literal:' prefix)
143 146 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
144 147 ('literal', 're:foobar', [False, True])
145 148
146 149 unknown prefixes are ignored and treated as literals
147 150 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
148 151 ('literal', 'foo:bar', [False, False, True])
149 152
150 153 case insensitive regex matches
151 154 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
152 155 ('re', 'A.+b', [False, False, True])
153 156
154 157 case insensitive literal matches
155 158 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
156 159 ('literal', 'ABCDEFG', [False, False, True])
157 160 """
158 161 if pattern.startswith('re:'):
159 162 pattern = pattern[3:]
160 163 try:
161 164 flags = 0
162 165 if not casesensitive:
163 166 flags = remod.I
164 167 regex = remod.compile(pattern, flags)
165 168 except remod.error as e:
166 169 raise error.ParseError(_('invalid regular expression: %s')
167 170 % e)
168 171 return 're', pattern, regex.search
169 172 elif pattern.startswith('literal:'):
170 173 pattern = pattern[8:]
171 174
172 175 match = pattern.__eq__
173 176
174 177 if not casesensitive:
175 178 ipat = encoding.lower(pattern)
176 179 match = lambda s: ipat == encoding.lower(s)
177 180 return 'literal', pattern, match
178 181
179 182 def shortuser(user):
180 183 """Return a short representation of a user name or email address."""
181 184 f = user.find('@')
182 185 if f >= 0:
183 186 user = user[:f]
184 187 f = user.find('<')
185 188 if f >= 0:
186 189 user = user[f + 1:]
187 190 f = user.find(' ')
188 191 if f >= 0:
189 192 user = user[:f]
190 193 f = user.find('.')
191 194 if f >= 0:
192 195 user = user[:f]
193 196 return user
194 197
195 198 def emailuser(user):
196 199 """Return the user portion of an email address."""
197 200 f = user.find('@')
198 201 if f >= 0:
199 202 user = user[:f]
200 203 f = user.find('<')
201 204 if f >= 0:
202 205 user = user[f + 1:]
203 206 return user
204 207
205 208 def email(author):
206 209 '''get email of author.'''
207 210 r = author.find('>')
208 211 if r == -1:
209 212 r = None
210 213 return author[author.find('<') + 1:r]
211 214
212 215 def person(author):
213 216 """Returns the name before an email address,
214 217 interpreting it as per RFC 5322
215 218
216 219 >>> person(b'foo@bar')
217 220 'foo'
218 221 >>> person(b'Foo Bar <foo@bar>')
219 222 'Foo Bar'
220 223 >>> person(b'"Foo Bar" <foo@bar>')
221 224 'Foo Bar'
222 225 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
223 226 'Foo "buz" Bar'
224 227 >>> # The following are invalid, but do exist in real-life
225 228 ...
226 229 >>> person(b'Foo "buz" Bar <foo@bar>')
227 230 'Foo "buz" Bar'
228 231 >>> person(b'"Foo Bar <foo@bar>')
229 232 'Foo Bar'
230 233 """
231 234 if '@' not in author:
232 235 return author
233 236 f = author.find('<')
234 237 if f != -1:
235 238 return author[:f].strip(' "').replace('\\"', '"')
236 239 f = author.find('@')
237 240 return author[:f].replace('.', ' ')
238 241
239 242 @attr.s(hash=True)
240 243 class mailmapping(object):
241 244 '''Represents a username/email key or value in
242 245 a mailmap file'''
243 246 email = attr.ib()
244 247 name = attr.ib(default=None)
245 248
246 249 def _ismailmaplineinvalid(names, emails):
247 250 '''Returns True if the parsed names and emails
248 251 in a mailmap entry are invalid.
249 252
250 253 >>> # No names or emails fails
251 254 >>> names, emails = [], []
252 255 >>> _ismailmaplineinvalid(names, emails)
253 256 True
254 257 >>> # Only one email fails
255 258 >>> emails = [b'email@email.com']
256 259 >>> _ismailmaplineinvalid(names, emails)
257 260 True
258 261 >>> # One email and one name passes
259 262 >>> names = [b'Test Name']
260 263 >>> _ismailmaplineinvalid(names, emails)
261 264 False
262 265 >>> # No names but two emails passes
263 266 >>> names = []
264 267 >>> emails = [b'proper@email.com', b'commit@email.com']
265 268 >>> _ismailmaplineinvalid(names, emails)
266 269 False
267 270 '''
268 271 return not emails or not names and len(emails) < 2
269 272
270 273 def parsemailmap(mailmapcontent):
271 274 """Parses data in the .mailmap format
272 275
273 276 >>> mmdata = b"\\n".join([
274 277 ... b'# Comment',
275 278 ... b'Name <commit1@email.xx>',
276 279 ... b'<name@email.xx> <commit2@email.xx>',
277 280 ... b'Name <proper@email.xx> <commit3@email.xx>',
278 281 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
279 282 ... ])
280 283 >>> mm = parsemailmap(mmdata)
281 284 >>> for key in sorted(mm.keys()):
282 285 ... print(key)
283 286 mailmapping(email='commit1@email.xx', name=None)
284 287 mailmapping(email='commit2@email.xx', name=None)
285 288 mailmapping(email='commit3@email.xx', name=None)
286 289 mailmapping(email='commit4@email.xx', name='Commit')
287 290 >>> for val in sorted(mm.values()):
288 291 ... print(val)
289 292 mailmapping(email='commit1@email.xx', name='Name')
290 293 mailmapping(email='name@email.xx', name=None)
291 294 mailmapping(email='proper@email.xx', name='Name')
292 295 mailmapping(email='proper@email.xx', name='Name')
293 296 """
294 297 mailmap = {}
295 298
296 299 if mailmapcontent is None:
297 300 return mailmap
298 301
299 302 for line in mailmapcontent.splitlines():
300 303
301 304 # Don't bother checking the line if it is a comment or
302 305 # is an improperly formed author field
303 306 if line.lstrip().startswith('#'):
304 307 continue
305 308
306 309 # names, emails hold the parsed emails and names for each line
307 310 # name_builder holds the words in a persons name
308 311 names, emails = [], []
309 312 namebuilder = []
310 313
311 314 for element in line.split():
312 315 if element.startswith('#'):
313 316 # If we reach a comment in the mailmap file, move on
314 317 break
315 318
316 319 elif element.startswith('<') and element.endswith('>'):
317 320 # We have found an email.
318 321 # Parse it, and finalize any names from earlier
319 322 emails.append(element[1:-1]) # Slice off the "<>"
320 323
321 324 if namebuilder:
322 325 names.append(' '.join(namebuilder))
323 326 namebuilder = []
324 327
325 328 # Break if we have found a second email, any other
326 329 # data does not fit the spec for .mailmap
327 330 if len(emails) > 1:
328 331 break
329 332
330 333 else:
331 334 # We have found another word in the committers name
332 335 namebuilder.append(element)
333 336
334 337 # Check to see if we have parsed the line into a valid form
335 338 # We require at least one email, and either at least one
336 339 # name or a second email
337 340 if _ismailmaplineinvalid(names, emails):
338 341 continue
339 342
340 343 mailmapkey = mailmapping(
341 344 email=emails[-1],
342 345 name=names[-1] if len(names) == 2 else None,
343 346 )
344 347
345 348 mailmap[mailmapkey] = mailmapping(
346 349 email=emails[0],
347 350 name=names[0] if names else None,
348 351 )
349 352
350 353 return mailmap
351 354
352 355 def mapname(mailmap, author):
353 356 """Returns the author field according to the mailmap cache, or
354 357 the original author field.
355 358
356 359 >>> mmdata = b"\\n".join([
357 360 ... b'# Comment',
358 361 ... b'Name <commit1@email.xx>',
359 362 ... b'<name@email.xx> <commit2@email.xx>',
360 363 ... b'Name <proper@email.xx> <commit3@email.xx>',
361 364 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
362 365 ... ])
363 366 >>> m = parsemailmap(mmdata)
364 367 >>> mapname(m, b'Commit <commit1@email.xx>')
365 368 'Name <commit1@email.xx>'
366 369 >>> mapname(m, b'Name <commit2@email.xx>')
367 370 'Name <name@email.xx>'
368 371 >>> mapname(m, b'Commit <commit3@email.xx>')
369 372 'Name <proper@email.xx>'
370 373 >>> mapname(m, b'Commit <commit4@email.xx>')
371 374 'Name <proper@email.xx>'
372 375 >>> mapname(m, b'Unknown Name <unknown@email.com>')
373 376 'Unknown Name <unknown@email.com>'
374 377 """
375 378 # If the author field coming in isn't in the correct format,
376 379 # or the mailmap is empty just return the original author field
377 380 if not isauthorwellformed(author) or not mailmap:
378 381 return author
379 382
380 383 # Turn the user name into a mailmapping
381 384 commit = mailmapping(name=person(author), email=email(author))
382 385
383 386 try:
384 387 # Try and use both the commit email and name as the key
385 388 proper = mailmap[commit]
386 389
387 390 except KeyError:
388 391 # If the lookup fails, use just the email as the key instead
389 392 # We call this commit2 as not to erase original commit fields
390 393 commit2 = mailmapping(email=commit.email)
391 394 proper = mailmap.get(commit2, mailmapping(None, None))
392 395
393 396 # Return the author field with proper values filled in
394 397 return '%s <%s>' % (
395 398 proper.name if proper.name else commit.name,
396 399 proper.email if proper.email else commit.email,
397 400 )
398 401
399 402 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
400 403
401 404 def isauthorwellformed(author):
402 405 '''Return True if the author field is well formed
403 406 (ie "Contributor Name <contrib@email.dom>")
404 407
405 408 >>> isauthorwellformed(b'Good Author <good@author.com>')
406 409 True
407 410 >>> isauthorwellformed(b'Author <good@author.com>')
408 411 True
409 412 >>> isauthorwellformed(b'Bad Author')
410 413 False
411 414 >>> isauthorwellformed(b'Bad Author <author@author.com')
412 415 False
413 416 >>> isauthorwellformed(b'Bad Author author@author.com')
414 417 False
415 418 >>> isauthorwellformed(b'<author@author.com>')
416 419 False
417 420 >>> isauthorwellformed(b'Bad Author <author>')
418 421 False
419 422 '''
420 423 return _correctauthorformat.match(author) is not None
421 424
422 425 def ellipsis(text, maxlength=400):
423 426 """Trim string to at most maxlength (default: 400) columns in display."""
424 427 return encoding.trim(text, maxlength, ellipsis='...')
425 428
426 429 def escapestr(s):
427 430 # call underlying function of s.encode('string_escape') directly for
428 431 # Python 3 compatibility
429 432 return codecs.escape_encode(s)[0]
430 433
431 434 def unescapestr(s):
432 435 return codecs.escape_decode(s)[0]
433 436
434 437 def forcebytestr(obj):
435 438 """Portably format an arbitrary object (e.g. exception) into a byte
436 439 string."""
437 440 try:
438 441 return pycompat.bytestr(obj)
439 442 except UnicodeEncodeError:
440 443 # non-ascii string, may be lossy
441 444 return pycompat.bytestr(encoding.strtolocal(str(obj)))
442 445
443 446 def uirepr(s):
444 447 # Avoid double backslash in Windows path repr()
445 448 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
446 449
447 450 # delay import of textwrap
448 451 def _MBTextWrapper(**kwargs):
449 452 class tw(textwrap.TextWrapper):
450 453 """
451 454 Extend TextWrapper for width-awareness.
452 455
453 456 Neither number of 'bytes' in any encoding nor 'characters' is
454 457 appropriate to calculate terminal columns for specified string.
455 458
456 459 Original TextWrapper implementation uses built-in 'len()' directly,
457 460 so overriding is needed to use width information of each characters.
458 461
459 462 In addition, characters classified into 'ambiguous' width are
460 463 treated as wide in East Asian area, but as narrow in other.
461 464
462 465 This requires use decision to determine width of such characters.
463 466 """
464 467 def _cutdown(self, ucstr, space_left):
465 468 l = 0
466 469 colwidth = encoding.ucolwidth
467 470 for i in pycompat.xrange(len(ucstr)):
468 471 l += colwidth(ucstr[i])
469 472 if space_left < l:
470 473 return (ucstr[:i], ucstr[i:])
471 474 return ucstr, ''
472 475
473 476 # overriding of base class
474 477 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
475 478 space_left = max(width - cur_len, 1)
476 479
477 480 if self.break_long_words:
478 481 cut, res = self._cutdown(reversed_chunks[-1], space_left)
479 482 cur_line.append(cut)
480 483 reversed_chunks[-1] = res
481 484 elif not cur_line:
482 485 cur_line.append(reversed_chunks.pop())
483 486
484 487 # this overriding code is imported from TextWrapper of Python 2.6
485 488 # to calculate columns of string by 'encoding.ucolwidth()'
486 489 def _wrap_chunks(self, chunks):
487 490 colwidth = encoding.ucolwidth
488 491
489 492 lines = []
490 493 if self.width <= 0:
491 494 raise ValueError("invalid width %r (must be > 0)" % self.width)
492 495
493 496 # Arrange in reverse order so items can be efficiently popped
494 497 # from a stack of chucks.
495 498 chunks.reverse()
496 499
497 500 while chunks:
498 501
499 502 # Start the list of chunks that will make up the current line.
500 503 # cur_len is just the length of all the chunks in cur_line.
501 504 cur_line = []
502 505 cur_len = 0
503 506
504 507 # Figure out which static string will prefix this line.
505 508 if lines:
506 509 indent = self.subsequent_indent
507 510 else:
508 511 indent = self.initial_indent
509 512
510 513 # Maximum width for this line.
511 514 width = self.width - len(indent)
512 515
513 516 # First chunk on line is whitespace -- drop it, unless this
514 517 # is the very beginning of the text (i.e. no lines started yet).
515 518 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
516 519 del chunks[-1]
517 520
518 521 while chunks:
519 522 l = colwidth(chunks[-1])
520 523
521 524 # Can at least squeeze this chunk onto the current line.
522 525 if cur_len + l <= width:
523 526 cur_line.append(chunks.pop())
524 527 cur_len += l
525 528
526 529 # Nope, this line is full.
527 530 else:
528 531 break
529 532
530 533 # The current line is full, and the next chunk is too big to
531 534 # fit on *any* line (not just this one).
532 535 if chunks and colwidth(chunks[-1]) > width:
533 536 self._handle_long_word(chunks, cur_line, cur_len, width)
534 537
535 538 # If the last chunk on this line is all whitespace, drop it.
536 539 if (self.drop_whitespace and
537 540 cur_line and cur_line[-1].strip() == r''):
538 541 del cur_line[-1]
539 542
540 543 # Convert current line back to a string and store it in list
541 544 # of all lines (return value).
542 545 if cur_line:
543 546 lines.append(indent + r''.join(cur_line))
544 547
545 548 return lines
546 549
547 550 global _MBTextWrapper
548 551 _MBTextWrapper = tw
549 552 return tw(**kwargs)
550 553
551 554 def wrap(line, width, initindent='', hangindent=''):
552 555 maxindent = max(len(hangindent), len(initindent))
553 556 if width <= maxindent:
554 557 # adjust for weird terminal size
555 558 width = max(78, maxindent + 1)
556 559 line = line.decode(pycompat.sysstr(encoding.encoding),
557 560 pycompat.sysstr(encoding.encodingmode))
558 561 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
559 562 pycompat.sysstr(encoding.encodingmode))
560 563 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
561 564 pycompat.sysstr(encoding.encodingmode))
562 565 wrapper = _MBTextWrapper(width=width,
563 566 initial_indent=initindent,
564 567 subsequent_indent=hangindent)
565 568 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
566 569
567 570 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
568 571 '0': False, 'no': False, 'false': False, 'off': False,
569 572 'never': False}
570 573
571 574 def parsebool(s):
572 575 """Parse s into a boolean.
573 576
574 577 If s is not a valid boolean, returns None.
575 578 """
576 579 return _booleans.get(s.lower(), None)
577 580
578 581 def evalpythonliteral(s):
579 582 """Evaluate a string containing a Python literal expression"""
580 583 # We could backport our tokenizer hack to rewrite '' to u'' if we want
581 584 if pycompat.ispy3:
582 585 return ast.literal_eval(s.decode('latin1'))
583 586 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now