##// END OF EJS Templates
stringutil: teach pprint how to format None...
Augie Fackler -
r37767:73d0a3dd default
parent child Browse files
Show More
@@ -1,515 +1,517 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 16
17 17 from ..i18n import _
18 18 from ..thirdparty import attr
19 19
20 20 from .. import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 )
25 25
26 26 def pprint(o):
27 27 """Pretty print an object."""
28 28 if isinstance(o, bytes):
29 29 return "b'%s'" % escapestr(o)
30 30 elif isinstance(o, bytearray):
31 31 # codecs.escape_encode() can't handle bytearray, so escapestr fails
32 32 # without coercion.
33 33 return "bytearray['%s']" % escapestr(bytes(o))
34 34 elif isinstance(o, list):
35 35 return '[%s]' % (b', '.join(pprint(a) for a in o))
36 36 elif isinstance(o, dict):
37 37 return '{%s}' % (b', '.join(
38 38 '%s: %s' % (pprint(k), pprint(v)) for k, v in sorted(o.items())))
39 39 elif isinstance(o, bool):
40 40 return b'True' if o else b'False'
41 41 elif isinstance(o, int):
42 42 return '%d' % o
43 43 elif isinstance(o, float):
44 44 return '%f' % o
45 elif o is None:
46 return b'None'
45 47 else:
46 48 raise error.ProgrammingError('do not know how to format %r' % o)
47 49
48 50 def binary(s):
49 51 """return true if a string is binary data"""
50 52 return bool(s and '\0' in s)
51 53
52 54 def stringmatcher(pattern, casesensitive=True):
53 55 """
54 56 accepts a string, possibly starting with 're:' or 'literal:' prefix.
55 57 returns the matcher name, pattern, and matcher function.
56 58 missing or unknown prefixes are treated as literal matches.
57 59
58 60 helper for tests:
59 61 >>> def test(pattern, *tests):
60 62 ... kind, pattern, matcher = stringmatcher(pattern)
61 63 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
62 64 >>> def itest(pattern, *tests):
63 65 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
64 66 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
65 67
66 68 exact matching (no prefix):
67 69 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
68 70 ('literal', 'abcdefg', [False, False, True])
69 71
70 72 regex matching ('re:' prefix)
71 73 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
72 74 ('re', 'a.+b', [False, False, True])
73 75
74 76 force exact matches ('literal:' prefix)
75 77 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
76 78 ('literal', 're:foobar', [False, True])
77 79
78 80 unknown prefixes are ignored and treated as literals
79 81 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
80 82 ('literal', 'foo:bar', [False, False, True])
81 83
82 84 case insensitive regex matches
83 85 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
84 86 ('re', 'A.+b', [False, False, True])
85 87
86 88 case insensitive literal matches
87 89 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
88 90 ('literal', 'ABCDEFG', [False, False, True])
89 91 """
90 92 if pattern.startswith('re:'):
91 93 pattern = pattern[3:]
92 94 try:
93 95 flags = 0
94 96 if not casesensitive:
95 97 flags = remod.I
96 98 regex = remod.compile(pattern, flags)
97 99 except remod.error as e:
98 100 raise error.ParseError(_('invalid regular expression: %s')
99 101 % e)
100 102 return 're', pattern, regex.search
101 103 elif pattern.startswith('literal:'):
102 104 pattern = pattern[8:]
103 105
104 106 match = pattern.__eq__
105 107
106 108 if not casesensitive:
107 109 ipat = encoding.lower(pattern)
108 110 match = lambda s: ipat == encoding.lower(s)
109 111 return 'literal', pattern, match
110 112
111 113 def shortuser(user):
112 114 """Return a short representation of a user name or email address."""
113 115 f = user.find('@')
114 116 if f >= 0:
115 117 user = user[:f]
116 118 f = user.find('<')
117 119 if f >= 0:
118 120 user = user[f + 1:]
119 121 f = user.find(' ')
120 122 if f >= 0:
121 123 user = user[:f]
122 124 f = user.find('.')
123 125 if f >= 0:
124 126 user = user[:f]
125 127 return user
126 128
127 129 def emailuser(user):
128 130 """Return the user portion of an email address."""
129 131 f = user.find('@')
130 132 if f >= 0:
131 133 user = user[:f]
132 134 f = user.find('<')
133 135 if f >= 0:
134 136 user = user[f + 1:]
135 137 return user
136 138
137 139 def email(author):
138 140 '''get email of author.'''
139 141 r = author.find('>')
140 142 if r == -1:
141 143 r = None
142 144 return author[author.find('<') + 1:r]
143 145
144 146 def person(author):
145 147 """Returns the name before an email address,
146 148 interpreting it as per RFC 5322
147 149
148 150 >>> person(b'foo@bar')
149 151 'foo'
150 152 >>> person(b'Foo Bar <foo@bar>')
151 153 'Foo Bar'
152 154 >>> person(b'"Foo Bar" <foo@bar>')
153 155 'Foo Bar'
154 156 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
155 157 'Foo "buz" Bar'
156 158 >>> # The following are invalid, but do exist in real-life
157 159 ...
158 160 >>> person(b'Foo "buz" Bar <foo@bar>')
159 161 'Foo "buz" Bar'
160 162 >>> person(b'"Foo Bar <foo@bar>')
161 163 'Foo Bar'
162 164 """
163 165 if '@' not in author:
164 166 return author
165 167 f = author.find('<')
166 168 if f != -1:
167 169 return author[:f].strip(' "').replace('\\"', '"')
168 170 f = author.find('@')
169 171 return author[:f].replace('.', ' ')
170 172
171 173 @attr.s(hash=True)
172 174 class mailmapping(object):
173 175 '''Represents a username/email key or value in
174 176 a mailmap file'''
175 177 email = attr.ib()
176 178 name = attr.ib(default=None)
177 179
178 180 def _ismailmaplineinvalid(names, emails):
179 181 '''Returns True if the parsed names and emails
180 182 in a mailmap entry are invalid.
181 183
182 184 >>> # No names or emails fails
183 185 >>> names, emails = [], []
184 186 >>> _ismailmaplineinvalid(names, emails)
185 187 True
186 188 >>> # Only one email fails
187 189 >>> emails = [b'email@email.com']
188 190 >>> _ismailmaplineinvalid(names, emails)
189 191 True
190 192 >>> # One email and one name passes
191 193 >>> names = [b'Test Name']
192 194 >>> _ismailmaplineinvalid(names, emails)
193 195 False
194 196 >>> # No names but two emails passes
195 197 >>> names = []
196 198 >>> emails = [b'proper@email.com', b'commit@email.com']
197 199 >>> _ismailmaplineinvalid(names, emails)
198 200 False
199 201 '''
200 202 return not emails or not names and len(emails) < 2
201 203
202 204 def parsemailmap(mailmapcontent):
203 205 """Parses data in the .mailmap format
204 206
205 207 >>> mmdata = b"\\n".join([
206 208 ... b'# Comment',
207 209 ... b'Name <commit1@email.xx>',
208 210 ... b'<name@email.xx> <commit2@email.xx>',
209 211 ... b'Name <proper@email.xx> <commit3@email.xx>',
210 212 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
211 213 ... ])
212 214 >>> mm = parsemailmap(mmdata)
213 215 >>> for key in sorted(mm.keys()):
214 216 ... print(key)
215 217 mailmapping(email='commit1@email.xx', name=None)
216 218 mailmapping(email='commit2@email.xx', name=None)
217 219 mailmapping(email='commit3@email.xx', name=None)
218 220 mailmapping(email='commit4@email.xx', name='Commit')
219 221 >>> for val in sorted(mm.values()):
220 222 ... print(val)
221 223 mailmapping(email='commit1@email.xx', name='Name')
222 224 mailmapping(email='name@email.xx', name=None)
223 225 mailmapping(email='proper@email.xx', name='Name')
224 226 mailmapping(email='proper@email.xx', name='Name')
225 227 """
226 228 mailmap = {}
227 229
228 230 if mailmapcontent is None:
229 231 return mailmap
230 232
231 233 for line in mailmapcontent.splitlines():
232 234
233 235 # Don't bother checking the line if it is a comment or
234 236 # is an improperly formed author field
235 237 if line.lstrip().startswith('#'):
236 238 continue
237 239
238 240 # names, emails hold the parsed emails and names for each line
239 241 # name_builder holds the words in a persons name
240 242 names, emails = [], []
241 243 namebuilder = []
242 244
243 245 for element in line.split():
244 246 if element.startswith('#'):
245 247 # If we reach a comment in the mailmap file, move on
246 248 break
247 249
248 250 elif element.startswith('<') and element.endswith('>'):
249 251 # We have found an email.
250 252 # Parse it, and finalize any names from earlier
251 253 emails.append(element[1:-1]) # Slice off the "<>"
252 254
253 255 if namebuilder:
254 256 names.append(' '.join(namebuilder))
255 257 namebuilder = []
256 258
257 259 # Break if we have found a second email, any other
258 260 # data does not fit the spec for .mailmap
259 261 if len(emails) > 1:
260 262 break
261 263
262 264 else:
263 265 # We have found another word in the committers name
264 266 namebuilder.append(element)
265 267
266 268 # Check to see if we have parsed the line into a valid form
267 269 # We require at least one email, and either at least one
268 270 # name or a second email
269 271 if _ismailmaplineinvalid(names, emails):
270 272 continue
271 273
272 274 mailmapkey = mailmapping(
273 275 email=emails[-1],
274 276 name=names[-1] if len(names) == 2 else None,
275 277 )
276 278
277 279 mailmap[mailmapkey] = mailmapping(
278 280 email=emails[0],
279 281 name=names[0] if names else None,
280 282 )
281 283
282 284 return mailmap
283 285
284 286 def mapname(mailmap, author):
285 287 """Returns the author field according to the mailmap cache, or
286 288 the original author field.
287 289
288 290 >>> mmdata = b"\\n".join([
289 291 ... b'# Comment',
290 292 ... b'Name <commit1@email.xx>',
291 293 ... b'<name@email.xx> <commit2@email.xx>',
292 294 ... b'Name <proper@email.xx> <commit3@email.xx>',
293 295 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
294 296 ... ])
295 297 >>> m = parsemailmap(mmdata)
296 298 >>> mapname(m, b'Commit <commit1@email.xx>')
297 299 'Name <commit1@email.xx>'
298 300 >>> mapname(m, b'Name <commit2@email.xx>')
299 301 'Name <name@email.xx>'
300 302 >>> mapname(m, b'Commit <commit3@email.xx>')
301 303 'Name <proper@email.xx>'
302 304 >>> mapname(m, b'Commit <commit4@email.xx>')
303 305 'Name <proper@email.xx>'
304 306 >>> mapname(m, b'Unknown Name <unknown@email.com>')
305 307 'Unknown Name <unknown@email.com>'
306 308 """
307 309 # If the author field coming in isn't in the correct format,
308 310 # or the mailmap is empty just return the original author field
309 311 if not isauthorwellformed(author) or not mailmap:
310 312 return author
311 313
312 314 # Turn the user name into a mailmapping
313 315 commit = mailmapping(name=person(author), email=email(author))
314 316
315 317 try:
316 318 # Try and use both the commit email and name as the key
317 319 proper = mailmap[commit]
318 320
319 321 except KeyError:
320 322 # If the lookup fails, use just the email as the key instead
321 323 # We call this commit2 as not to erase original commit fields
322 324 commit2 = mailmapping(email=commit.email)
323 325 proper = mailmap.get(commit2, mailmapping(None, None))
324 326
325 327 # Return the author field with proper values filled in
326 328 return '%s <%s>' % (
327 329 proper.name if proper.name else commit.name,
328 330 proper.email if proper.email else commit.email,
329 331 )
330 332
331 333 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
332 334
333 335 def isauthorwellformed(author):
334 336 '''Return True if the author field is well formed
335 337 (ie "Contributor Name <contrib@email.dom>")
336 338
337 339 >>> isauthorwellformed(b'Good Author <good@author.com>')
338 340 True
339 341 >>> isauthorwellformed(b'Author <good@author.com>')
340 342 True
341 343 >>> isauthorwellformed(b'Bad Author')
342 344 False
343 345 >>> isauthorwellformed(b'Bad Author <author@author.com')
344 346 False
345 347 >>> isauthorwellformed(b'Bad Author author@author.com')
346 348 False
347 349 >>> isauthorwellformed(b'<author@author.com>')
348 350 False
349 351 >>> isauthorwellformed(b'Bad Author <author>')
350 352 False
351 353 '''
352 354 return _correctauthorformat.match(author) is not None
353 355
354 356 def ellipsis(text, maxlength=400):
355 357 """Trim string to at most maxlength (default: 400) columns in display."""
356 358 return encoding.trim(text, maxlength, ellipsis='...')
357 359
358 360 def escapestr(s):
359 361 # call underlying function of s.encode('string_escape') directly for
360 362 # Python 3 compatibility
361 363 return codecs.escape_encode(s)[0]
362 364
363 365 def unescapestr(s):
364 366 return codecs.escape_decode(s)[0]
365 367
366 368 def forcebytestr(obj):
367 369 """Portably format an arbitrary object (e.g. exception) into a byte
368 370 string."""
369 371 try:
370 372 return pycompat.bytestr(obj)
371 373 except UnicodeEncodeError:
372 374 # non-ascii string, may be lossy
373 375 return pycompat.bytestr(encoding.strtolocal(str(obj)))
374 376
375 377 def uirepr(s):
376 378 # Avoid double backslash in Windows path repr()
377 379 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
378 380
379 381 # delay import of textwrap
380 382 def _MBTextWrapper(**kwargs):
381 383 class tw(textwrap.TextWrapper):
382 384 """
383 385 Extend TextWrapper for width-awareness.
384 386
385 387 Neither number of 'bytes' in any encoding nor 'characters' is
386 388 appropriate to calculate terminal columns for specified string.
387 389
388 390 Original TextWrapper implementation uses built-in 'len()' directly,
389 391 so overriding is needed to use width information of each characters.
390 392
391 393 In addition, characters classified into 'ambiguous' width are
392 394 treated as wide in East Asian area, but as narrow in other.
393 395
394 396 This requires use decision to determine width of such characters.
395 397 """
396 398 def _cutdown(self, ucstr, space_left):
397 399 l = 0
398 400 colwidth = encoding.ucolwidth
399 401 for i in xrange(len(ucstr)):
400 402 l += colwidth(ucstr[i])
401 403 if space_left < l:
402 404 return (ucstr[:i], ucstr[i:])
403 405 return ucstr, ''
404 406
405 407 # overriding of base class
406 408 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
407 409 space_left = max(width - cur_len, 1)
408 410
409 411 if self.break_long_words:
410 412 cut, res = self._cutdown(reversed_chunks[-1], space_left)
411 413 cur_line.append(cut)
412 414 reversed_chunks[-1] = res
413 415 elif not cur_line:
414 416 cur_line.append(reversed_chunks.pop())
415 417
416 418 # this overriding code is imported from TextWrapper of Python 2.6
417 419 # to calculate columns of string by 'encoding.ucolwidth()'
418 420 def _wrap_chunks(self, chunks):
419 421 colwidth = encoding.ucolwidth
420 422
421 423 lines = []
422 424 if self.width <= 0:
423 425 raise ValueError("invalid width %r (must be > 0)" % self.width)
424 426
425 427 # Arrange in reverse order so items can be efficiently popped
426 428 # from a stack of chucks.
427 429 chunks.reverse()
428 430
429 431 while chunks:
430 432
431 433 # Start the list of chunks that will make up the current line.
432 434 # cur_len is just the length of all the chunks in cur_line.
433 435 cur_line = []
434 436 cur_len = 0
435 437
436 438 # Figure out which static string will prefix this line.
437 439 if lines:
438 440 indent = self.subsequent_indent
439 441 else:
440 442 indent = self.initial_indent
441 443
442 444 # Maximum width for this line.
443 445 width = self.width - len(indent)
444 446
445 447 # First chunk on line is whitespace -- drop it, unless this
446 448 # is the very beginning of the text (i.e. no lines started yet).
447 449 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
448 450 del chunks[-1]
449 451
450 452 while chunks:
451 453 l = colwidth(chunks[-1])
452 454
453 455 # Can at least squeeze this chunk onto the current line.
454 456 if cur_len + l <= width:
455 457 cur_line.append(chunks.pop())
456 458 cur_len += l
457 459
458 460 # Nope, this line is full.
459 461 else:
460 462 break
461 463
462 464 # The current line is full, and the next chunk is too big to
463 465 # fit on *any* line (not just this one).
464 466 if chunks and colwidth(chunks[-1]) > width:
465 467 self._handle_long_word(chunks, cur_line, cur_len, width)
466 468
467 469 # If the last chunk on this line is all whitespace, drop it.
468 470 if (self.drop_whitespace and
469 471 cur_line and cur_line[-1].strip() == r''):
470 472 del cur_line[-1]
471 473
472 474 # Convert current line back to a string and store it in list
473 475 # of all lines (return value).
474 476 if cur_line:
475 477 lines.append(indent + r''.join(cur_line))
476 478
477 479 return lines
478 480
479 481 global _MBTextWrapper
480 482 _MBTextWrapper = tw
481 483 return tw(**kwargs)
482 484
483 485 def wrap(line, width, initindent='', hangindent=''):
484 486 maxindent = max(len(hangindent), len(initindent))
485 487 if width <= maxindent:
486 488 # adjust for weird terminal size
487 489 width = max(78, maxindent + 1)
488 490 line = line.decode(pycompat.sysstr(encoding.encoding),
489 491 pycompat.sysstr(encoding.encodingmode))
490 492 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
491 493 pycompat.sysstr(encoding.encodingmode))
492 494 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
493 495 pycompat.sysstr(encoding.encodingmode))
494 496 wrapper = _MBTextWrapper(width=width,
495 497 initial_indent=initindent,
496 498 subsequent_indent=hangindent)
497 499 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
498 500
499 501 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
500 502 '0': False, 'no': False, 'false': False, 'off': False,
501 503 'never': False}
502 504
503 505 def parsebool(s):
504 506 """Parse s into a boolean.
505 507
506 508 If s is not a valid boolean, returns None.
507 509 """
508 510 return _booleans.get(s.lower(), None)
509 511
510 512 def evalpythonliteral(s):
511 513 """Evaluate a string containing a Python literal expression"""
512 514 # We could backport our tokenizer hack to rewrite '' to u'' if we want
513 515 if pycompat.ispy3:
514 516 return ast.literal_eval(s.decode('latin1'))
515 517 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now