##// END OF EJS Templates
stringutil: make b prefixes on string output optional...
Augie Fackler -
r37768:f7194c92 default
parent child Browse files
Show More
@@ -1,517 +1,521 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 16
17 17 from ..i18n import _
18 18 from ..thirdparty import attr
19 19
20 20 from .. import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 )
25 25
26 def pprint(o):
26 def pprint(o, bprefix=True):
27 27 """Pretty print an object."""
28 28 if isinstance(o, bytes):
29 return "b'%s'" % escapestr(o)
29 if bprefix:
30 return "b'%s'" % escapestr(o)
31 return "'%s'" % escapestr(o)
30 32 elif isinstance(o, bytearray):
31 33 # codecs.escape_encode() can't handle bytearray, so escapestr fails
32 34 # without coercion.
33 35 return "bytearray['%s']" % escapestr(bytes(o))
34 36 elif isinstance(o, list):
35 return '[%s]' % (b', '.join(pprint(a) for a in o))
37 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
36 38 elif isinstance(o, dict):
37 39 return '{%s}' % (b', '.join(
38 '%s: %s' % (pprint(k), pprint(v)) for k, v in sorted(o.items())))
40 '%s: %s' % (pprint(k, bprefix=bprefix),
41 pprint(v, bprefix=bprefix))
42 for k, v in sorted(o.items())))
39 43 elif isinstance(o, bool):
40 44 return b'True' if o else b'False'
41 45 elif isinstance(o, int):
42 46 return '%d' % o
43 47 elif isinstance(o, float):
44 48 return '%f' % o
45 49 elif o is None:
46 50 return b'None'
47 51 else:
48 52 raise error.ProgrammingError('do not know how to format %r' % o)
49 53
50 54 def binary(s):
51 55 """return true if a string is binary data"""
52 56 return bool(s and '\0' in s)
53 57
54 58 def stringmatcher(pattern, casesensitive=True):
55 59 """
56 60 accepts a string, possibly starting with 're:' or 'literal:' prefix.
57 61 returns the matcher name, pattern, and matcher function.
58 62 missing or unknown prefixes are treated as literal matches.
59 63
60 64 helper for tests:
61 65 >>> def test(pattern, *tests):
62 66 ... kind, pattern, matcher = stringmatcher(pattern)
63 67 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
64 68 >>> def itest(pattern, *tests):
65 69 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
66 70 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
67 71
68 72 exact matching (no prefix):
69 73 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
70 74 ('literal', 'abcdefg', [False, False, True])
71 75
72 76 regex matching ('re:' prefix)
73 77 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
74 78 ('re', 'a.+b', [False, False, True])
75 79
76 80 force exact matches ('literal:' prefix)
77 81 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
78 82 ('literal', 're:foobar', [False, True])
79 83
80 84 unknown prefixes are ignored and treated as literals
81 85 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
82 86 ('literal', 'foo:bar', [False, False, True])
83 87
84 88 case insensitive regex matches
85 89 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
86 90 ('re', 'A.+b', [False, False, True])
87 91
88 92 case insensitive literal matches
89 93 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
90 94 ('literal', 'ABCDEFG', [False, False, True])
91 95 """
92 96 if pattern.startswith('re:'):
93 97 pattern = pattern[3:]
94 98 try:
95 99 flags = 0
96 100 if not casesensitive:
97 101 flags = remod.I
98 102 regex = remod.compile(pattern, flags)
99 103 except remod.error as e:
100 104 raise error.ParseError(_('invalid regular expression: %s')
101 105 % e)
102 106 return 're', pattern, regex.search
103 107 elif pattern.startswith('literal:'):
104 108 pattern = pattern[8:]
105 109
106 110 match = pattern.__eq__
107 111
108 112 if not casesensitive:
109 113 ipat = encoding.lower(pattern)
110 114 match = lambda s: ipat == encoding.lower(s)
111 115 return 'literal', pattern, match
112 116
113 117 def shortuser(user):
114 118 """Return a short representation of a user name or email address."""
115 119 f = user.find('@')
116 120 if f >= 0:
117 121 user = user[:f]
118 122 f = user.find('<')
119 123 if f >= 0:
120 124 user = user[f + 1:]
121 125 f = user.find(' ')
122 126 if f >= 0:
123 127 user = user[:f]
124 128 f = user.find('.')
125 129 if f >= 0:
126 130 user = user[:f]
127 131 return user
128 132
129 133 def emailuser(user):
130 134 """Return the user portion of an email address."""
131 135 f = user.find('@')
132 136 if f >= 0:
133 137 user = user[:f]
134 138 f = user.find('<')
135 139 if f >= 0:
136 140 user = user[f + 1:]
137 141 return user
138 142
139 143 def email(author):
140 144 '''get email of author.'''
141 145 r = author.find('>')
142 146 if r == -1:
143 147 r = None
144 148 return author[author.find('<') + 1:r]
145 149
146 150 def person(author):
147 151 """Returns the name before an email address,
148 152 interpreting it as per RFC 5322
149 153
150 154 >>> person(b'foo@bar')
151 155 'foo'
152 156 >>> person(b'Foo Bar <foo@bar>')
153 157 'Foo Bar'
154 158 >>> person(b'"Foo Bar" <foo@bar>')
155 159 'Foo Bar'
156 160 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
157 161 'Foo "buz" Bar'
158 162 >>> # The following are invalid, but do exist in real-life
159 163 ...
160 164 >>> person(b'Foo "buz" Bar <foo@bar>')
161 165 'Foo "buz" Bar'
162 166 >>> person(b'"Foo Bar <foo@bar>')
163 167 'Foo Bar'
164 168 """
165 169 if '@' not in author:
166 170 return author
167 171 f = author.find('<')
168 172 if f != -1:
169 173 return author[:f].strip(' "').replace('\\"', '"')
170 174 f = author.find('@')
171 175 return author[:f].replace('.', ' ')
172 176
173 177 @attr.s(hash=True)
174 178 class mailmapping(object):
175 179 '''Represents a username/email key or value in
176 180 a mailmap file'''
177 181 email = attr.ib()
178 182 name = attr.ib(default=None)
179 183
180 184 def _ismailmaplineinvalid(names, emails):
181 185 '''Returns True if the parsed names and emails
182 186 in a mailmap entry are invalid.
183 187
184 188 >>> # No names or emails fails
185 189 >>> names, emails = [], []
186 190 >>> _ismailmaplineinvalid(names, emails)
187 191 True
188 192 >>> # Only one email fails
189 193 >>> emails = [b'email@email.com']
190 194 >>> _ismailmaplineinvalid(names, emails)
191 195 True
192 196 >>> # One email and one name passes
193 197 >>> names = [b'Test Name']
194 198 >>> _ismailmaplineinvalid(names, emails)
195 199 False
196 200 >>> # No names but two emails passes
197 201 >>> names = []
198 202 >>> emails = [b'proper@email.com', b'commit@email.com']
199 203 >>> _ismailmaplineinvalid(names, emails)
200 204 False
201 205 '''
202 206 return not emails or not names and len(emails) < 2
203 207
204 208 def parsemailmap(mailmapcontent):
205 209 """Parses data in the .mailmap format
206 210
207 211 >>> mmdata = b"\\n".join([
208 212 ... b'# Comment',
209 213 ... b'Name <commit1@email.xx>',
210 214 ... b'<name@email.xx> <commit2@email.xx>',
211 215 ... b'Name <proper@email.xx> <commit3@email.xx>',
212 216 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
213 217 ... ])
214 218 >>> mm = parsemailmap(mmdata)
215 219 >>> for key in sorted(mm.keys()):
216 220 ... print(key)
217 221 mailmapping(email='commit1@email.xx', name=None)
218 222 mailmapping(email='commit2@email.xx', name=None)
219 223 mailmapping(email='commit3@email.xx', name=None)
220 224 mailmapping(email='commit4@email.xx', name='Commit')
221 225 >>> for val in sorted(mm.values()):
222 226 ... print(val)
223 227 mailmapping(email='commit1@email.xx', name='Name')
224 228 mailmapping(email='name@email.xx', name=None)
225 229 mailmapping(email='proper@email.xx', name='Name')
226 230 mailmapping(email='proper@email.xx', name='Name')
227 231 """
228 232 mailmap = {}
229 233
230 234 if mailmapcontent is None:
231 235 return mailmap
232 236
233 237 for line in mailmapcontent.splitlines():
234 238
235 239 # Don't bother checking the line if it is a comment or
236 240 # is an improperly formed author field
237 241 if line.lstrip().startswith('#'):
238 242 continue
239 243
240 244 # names, emails hold the parsed emails and names for each line
241 245 # name_builder holds the words in a persons name
242 246 names, emails = [], []
243 247 namebuilder = []
244 248
245 249 for element in line.split():
246 250 if element.startswith('#'):
247 251 # If we reach a comment in the mailmap file, move on
248 252 break
249 253
250 254 elif element.startswith('<') and element.endswith('>'):
251 255 # We have found an email.
252 256 # Parse it, and finalize any names from earlier
253 257 emails.append(element[1:-1]) # Slice off the "<>"
254 258
255 259 if namebuilder:
256 260 names.append(' '.join(namebuilder))
257 261 namebuilder = []
258 262
259 263 # Break if we have found a second email, any other
260 264 # data does not fit the spec for .mailmap
261 265 if len(emails) > 1:
262 266 break
263 267
264 268 else:
265 269 # We have found another word in the committers name
266 270 namebuilder.append(element)
267 271
268 272 # Check to see if we have parsed the line into a valid form
269 273 # We require at least one email, and either at least one
270 274 # name or a second email
271 275 if _ismailmaplineinvalid(names, emails):
272 276 continue
273 277
274 278 mailmapkey = mailmapping(
275 279 email=emails[-1],
276 280 name=names[-1] if len(names) == 2 else None,
277 281 )
278 282
279 283 mailmap[mailmapkey] = mailmapping(
280 284 email=emails[0],
281 285 name=names[0] if names else None,
282 286 )
283 287
284 288 return mailmap
285 289
286 290 def mapname(mailmap, author):
287 291 """Returns the author field according to the mailmap cache, or
288 292 the original author field.
289 293
290 294 >>> mmdata = b"\\n".join([
291 295 ... b'# Comment',
292 296 ... b'Name <commit1@email.xx>',
293 297 ... b'<name@email.xx> <commit2@email.xx>',
294 298 ... b'Name <proper@email.xx> <commit3@email.xx>',
295 299 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
296 300 ... ])
297 301 >>> m = parsemailmap(mmdata)
298 302 >>> mapname(m, b'Commit <commit1@email.xx>')
299 303 'Name <commit1@email.xx>'
300 304 >>> mapname(m, b'Name <commit2@email.xx>')
301 305 'Name <name@email.xx>'
302 306 >>> mapname(m, b'Commit <commit3@email.xx>')
303 307 'Name <proper@email.xx>'
304 308 >>> mapname(m, b'Commit <commit4@email.xx>')
305 309 'Name <proper@email.xx>'
306 310 >>> mapname(m, b'Unknown Name <unknown@email.com>')
307 311 'Unknown Name <unknown@email.com>'
308 312 """
309 313 # If the author field coming in isn't in the correct format,
310 314 # or the mailmap is empty just return the original author field
311 315 if not isauthorwellformed(author) or not mailmap:
312 316 return author
313 317
314 318 # Turn the user name into a mailmapping
315 319 commit = mailmapping(name=person(author), email=email(author))
316 320
317 321 try:
318 322 # Try and use both the commit email and name as the key
319 323 proper = mailmap[commit]
320 324
321 325 except KeyError:
322 326 # If the lookup fails, use just the email as the key instead
323 327 # We call this commit2 as not to erase original commit fields
324 328 commit2 = mailmapping(email=commit.email)
325 329 proper = mailmap.get(commit2, mailmapping(None, None))
326 330
327 331 # Return the author field with proper values filled in
328 332 return '%s <%s>' % (
329 333 proper.name if proper.name else commit.name,
330 334 proper.email if proper.email else commit.email,
331 335 )
332 336
333 337 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
334 338
335 339 def isauthorwellformed(author):
336 340 '''Return True if the author field is well formed
337 341 (ie "Contributor Name <contrib@email.dom>")
338 342
339 343 >>> isauthorwellformed(b'Good Author <good@author.com>')
340 344 True
341 345 >>> isauthorwellformed(b'Author <good@author.com>')
342 346 True
343 347 >>> isauthorwellformed(b'Bad Author')
344 348 False
345 349 >>> isauthorwellformed(b'Bad Author <author@author.com')
346 350 False
347 351 >>> isauthorwellformed(b'Bad Author author@author.com')
348 352 False
349 353 >>> isauthorwellformed(b'<author@author.com>')
350 354 False
351 355 >>> isauthorwellformed(b'Bad Author <author>')
352 356 False
353 357 '''
354 358 return _correctauthorformat.match(author) is not None
355 359
356 360 def ellipsis(text, maxlength=400):
357 361 """Trim string to at most maxlength (default: 400) columns in display."""
358 362 return encoding.trim(text, maxlength, ellipsis='...')
359 363
360 364 def escapestr(s):
361 365 # call underlying function of s.encode('string_escape') directly for
362 366 # Python 3 compatibility
363 367 return codecs.escape_encode(s)[0]
364 368
365 369 def unescapestr(s):
366 370 return codecs.escape_decode(s)[0]
367 371
368 372 def forcebytestr(obj):
369 373 """Portably format an arbitrary object (e.g. exception) into a byte
370 374 string."""
371 375 try:
372 376 return pycompat.bytestr(obj)
373 377 except UnicodeEncodeError:
374 378 # non-ascii string, may be lossy
375 379 return pycompat.bytestr(encoding.strtolocal(str(obj)))
376 380
377 381 def uirepr(s):
378 382 # Avoid double backslash in Windows path repr()
379 383 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
380 384
381 385 # delay import of textwrap
382 386 def _MBTextWrapper(**kwargs):
383 387 class tw(textwrap.TextWrapper):
384 388 """
385 389 Extend TextWrapper for width-awareness.
386 390
387 391 Neither number of 'bytes' in any encoding nor 'characters' is
388 392 appropriate to calculate terminal columns for specified string.
389 393
390 394 Original TextWrapper implementation uses built-in 'len()' directly,
391 395 so overriding is needed to use width information of each characters.
392 396
393 397 In addition, characters classified into 'ambiguous' width are
394 398 treated as wide in East Asian area, but as narrow in other.
395 399
396 400 This requires use decision to determine width of such characters.
397 401 """
398 402 def _cutdown(self, ucstr, space_left):
399 403 l = 0
400 404 colwidth = encoding.ucolwidth
401 405 for i in xrange(len(ucstr)):
402 406 l += colwidth(ucstr[i])
403 407 if space_left < l:
404 408 return (ucstr[:i], ucstr[i:])
405 409 return ucstr, ''
406 410
407 411 # overriding of base class
408 412 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
409 413 space_left = max(width - cur_len, 1)
410 414
411 415 if self.break_long_words:
412 416 cut, res = self._cutdown(reversed_chunks[-1], space_left)
413 417 cur_line.append(cut)
414 418 reversed_chunks[-1] = res
415 419 elif not cur_line:
416 420 cur_line.append(reversed_chunks.pop())
417 421
418 422 # this overriding code is imported from TextWrapper of Python 2.6
419 423 # to calculate columns of string by 'encoding.ucolwidth()'
420 424 def _wrap_chunks(self, chunks):
421 425 colwidth = encoding.ucolwidth
422 426
423 427 lines = []
424 428 if self.width <= 0:
425 429 raise ValueError("invalid width %r (must be > 0)" % self.width)
426 430
427 431 # Arrange in reverse order so items can be efficiently popped
428 432 # from a stack of chucks.
429 433 chunks.reverse()
430 434
431 435 while chunks:
432 436
433 437 # Start the list of chunks that will make up the current line.
434 438 # cur_len is just the length of all the chunks in cur_line.
435 439 cur_line = []
436 440 cur_len = 0
437 441
438 442 # Figure out which static string will prefix this line.
439 443 if lines:
440 444 indent = self.subsequent_indent
441 445 else:
442 446 indent = self.initial_indent
443 447
444 448 # Maximum width for this line.
445 449 width = self.width - len(indent)
446 450
447 451 # First chunk on line is whitespace -- drop it, unless this
448 452 # is the very beginning of the text (i.e. no lines started yet).
449 453 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
450 454 del chunks[-1]
451 455
452 456 while chunks:
453 457 l = colwidth(chunks[-1])
454 458
455 459 # Can at least squeeze this chunk onto the current line.
456 460 if cur_len + l <= width:
457 461 cur_line.append(chunks.pop())
458 462 cur_len += l
459 463
460 464 # Nope, this line is full.
461 465 else:
462 466 break
463 467
464 468 # The current line is full, and the next chunk is too big to
465 469 # fit on *any* line (not just this one).
466 470 if chunks and colwidth(chunks[-1]) > width:
467 471 self._handle_long_word(chunks, cur_line, cur_len, width)
468 472
469 473 # If the last chunk on this line is all whitespace, drop it.
470 474 if (self.drop_whitespace and
471 475 cur_line and cur_line[-1].strip() == r''):
472 476 del cur_line[-1]
473 477
474 478 # Convert current line back to a string and store it in list
475 479 # of all lines (return value).
476 480 if cur_line:
477 481 lines.append(indent + r''.join(cur_line))
478 482
479 483 return lines
480 484
481 485 global _MBTextWrapper
482 486 _MBTextWrapper = tw
483 487 return tw(**kwargs)
484 488
485 489 def wrap(line, width, initindent='', hangindent=''):
486 490 maxindent = max(len(hangindent), len(initindent))
487 491 if width <= maxindent:
488 492 # adjust for weird terminal size
489 493 width = max(78, maxindent + 1)
490 494 line = line.decode(pycompat.sysstr(encoding.encoding),
491 495 pycompat.sysstr(encoding.encodingmode))
492 496 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
493 497 pycompat.sysstr(encoding.encodingmode))
494 498 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
495 499 pycompat.sysstr(encoding.encodingmode))
496 500 wrapper = _MBTextWrapper(width=width,
497 501 initial_indent=initindent,
498 502 subsequent_indent=hangindent)
499 503 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
500 504
501 505 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
502 506 '0': False, 'no': False, 'false': False, 'off': False,
503 507 'never': False}
504 508
505 509 def parsebool(s):
506 510 """Parse s into a boolean.
507 511
508 512 If s is not a valid boolean, returns None.
509 513 """
510 514 return _booleans.get(s.lower(), None)
511 515
512 516 def evalpythonliteral(s):
513 517 """Evaluate a string containing a Python literal expression"""
514 518 # We could backport our tokenizer hack to rewrite '' to u'' if we want
515 519 if pycompat.ispy3:
516 520 return ast.literal_eval(s.decode('latin1'))
517 521 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now