##// END OF EJS Templates
stringutil: make b prefixes on string output optional...
Augie Fackler -
r37768:f7194c92 default
parent child Browse files
Show More
@@ -1,517 +1,521 b''
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import ast
12 import ast
13 import codecs
13 import codecs
14 import re as remod
14 import re as remod
15 import textwrap
15 import textwrap
16
16
17 from ..i18n import _
17 from ..i18n import _
18 from ..thirdparty import attr
18 from ..thirdparty import attr
19
19
20 from .. import (
20 from .. import (
21 encoding,
21 encoding,
22 error,
22 error,
23 pycompat,
23 pycompat,
24 )
24 )
25
25
26 def pprint(o):
26 def pprint(o, bprefix=True):
27 """Pretty print an object."""
27 """Pretty print an object."""
28 if isinstance(o, bytes):
28 if isinstance(o, bytes):
29 return "b'%s'" % escapestr(o)
29 if bprefix:
30 return "b'%s'" % escapestr(o)
31 return "'%s'" % escapestr(o)
30 elif isinstance(o, bytearray):
32 elif isinstance(o, bytearray):
31 # codecs.escape_encode() can't handle bytearray, so escapestr fails
33 # codecs.escape_encode() can't handle bytearray, so escapestr fails
32 # without coercion.
34 # without coercion.
33 return "bytearray['%s']" % escapestr(bytes(o))
35 return "bytearray['%s']" % escapestr(bytes(o))
34 elif isinstance(o, list):
36 elif isinstance(o, list):
35 return '[%s]' % (b', '.join(pprint(a) for a in o))
37 return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
36 elif isinstance(o, dict):
38 elif isinstance(o, dict):
37 return '{%s}' % (b', '.join(
39 return '{%s}' % (b', '.join(
38 '%s: %s' % (pprint(k), pprint(v)) for k, v in sorted(o.items())))
40 '%s: %s' % (pprint(k, bprefix=bprefix),
41 pprint(v, bprefix=bprefix))
42 for k, v in sorted(o.items())))
39 elif isinstance(o, bool):
43 elif isinstance(o, bool):
40 return b'True' if o else b'False'
44 return b'True' if o else b'False'
41 elif isinstance(o, int):
45 elif isinstance(o, int):
42 return '%d' % o
46 return '%d' % o
43 elif isinstance(o, float):
47 elif isinstance(o, float):
44 return '%f' % o
48 return '%f' % o
45 elif o is None:
49 elif o is None:
46 return b'None'
50 return b'None'
47 else:
51 else:
48 raise error.ProgrammingError('do not know how to format %r' % o)
52 raise error.ProgrammingError('do not know how to format %r' % o)
49
53
50 def binary(s):
54 def binary(s):
51 """return true if a string is binary data"""
55 """return true if a string is binary data"""
52 return bool(s and '\0' in s)
56 return bool(s and '\0' in s)
53
57
54 def stringmatcher(pattern, casesensitive=True):
58 def stringmatcher(pattern, casesensitive=True):
55 """
59 """
56 accepts a string, possibly starting with 're:' or 'literal:' prefix.
60 accepts a string, possibly starting with 're:' or 'literal:' prefix.
57 returns the matcher name, pattern, and matcher function.
61 returns the matcher name, pattern, and matcher function.
58 missing or unknown prefixes are treated as literal matches.
62 missing or unknown prefixes are treated as literal matches.
59
63
60 helper for tests:
64 helper for tests:
61 >>> def test(pattern, *tests):
65 >>> def test(pattern, *tests):
62 ... kind, pattern, matcher = stringmatcher(pattern)
66 ... kind, pattern, matcher = stringmatcher(pattern)
63 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
67 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
64 >>> def itest(pattern, *tests):
68 >>> def itest(pattern, *tests):
65 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
69 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
66 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
70 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
67
71
68 exact matching (no prefix):
72 exact matching (no prefix):
69 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
73 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
70 ('literal', 'abcdefg', [False, False, True])
74 ('literal', 'abcdefg', [False, False, True])
71
75
72 regex matching ('re:' prefix)
76 regex matching ('re:' prefix)
73 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
77 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
74 ('re', 'a.+b', [False, False, True])
78 ('re', 'a.+b', [False, False, True])
75
79
76 force exact matches ('literal:' prefix)
80 force exact matches ('literal:' prefix)
77 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
81 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
78 ('literal', 're:foobar', [False, True])
82 ('literal', 're:foobar', [False, True])
79
83
80 unknown prefixes are ignored and treated as literals
84 unknown prefixes are ignored and treated as literals
81 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
85 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
82 ('literal', 'foo:bar', [False, False, True])
86 ('literal', 'foo:bar', [False, False, True])
83
87
84 case insensitive regex matches
88 case insensitive regex matches
85 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
89 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
86 ('re', 'A.+b', [False, False, True])
90 ('re', 'A.+b', [False, False, True])
87
91
88 case insensitive literal matches
92 case insensitive literal matches
89 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
93 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
90 ('literal', 'ABCDEFG', [False, False, True])
94 ('literal', 'ABCDEFG', [False, False, True])
91 """
95 """
92 if pattern.startswith('re:'):
96 if pattern.startswith('re:'):
93 pattern = pattern[3:]
97 pattern = pattern[3:]
94 try:
98 try:
95 flags = 0
99 flags = 0
96 if not casesensitive:
100 if not casesensitive:
97 flags = remod.I
101 flags = remod.I
98 regex = remod.compile(pattern, flags)
102 regex = remod.compile(pattern, flags)
99 except remod.error as e:
103 except remod.error as e:
100 raise error.ParseError(_('invalid regular expression: %s')
104 raise error.ParseError(_('invalid regular expression: %s')
101 % e)
105 % e)
102 return 're', pattern, regex.search
106 return 're', pattern, regex.search
103 elif pattern.startswith('literal:'):
107 elif pattern.startswith('literal:'):
104 pattern = pattern[8:]
108 pattern = pattern[8:]
105
109
106 match = pattern.__eq__
110 match = pattern.__eq__
107
111
108 if not casesensitive:
112 if not casesensitive:
109 ipat = encoding.lower(pattern)
113 ipat = encoding.lower(pattern)
110 match = lambda s: ipat == encoding.lower(s)
114 match = lambda s: ipat == encoding.lower(s)
111 return 'literal', pattern, match
115 return 'literal', pattern, match
112
116
113 def shortuser(user):
117 def shortuser(user):
114 """Return a short representation of a user name or email address."""
118 """Return a short representation of a user name or email address."""
115 f = user.find('@')
119 f = user.find('@')
116 if f >= 0:
120 if f >= 0:
117 user = user[:f]
121 user = user[:f]
118 f = user.find('<')
122 f = user.find('<')
119 if f >= 0:
123 if f >= 0:
120 user = user[f + 1:]
124 user = user[f + 1:]
121 f = user.find(' ')
125 f = user.find(' ')
122 if f >= 0:
126 if f >= 0:
123 user = user[:f]
127 user = user[:f]
124 f = user.find('.')
128 f = user.find('.')
125 if f >= 0:
129 if f >= 0:
126 user = user[:f]
130 user = user[:f]
127 return user
131 return user
128
132
129 def emailuser(user):
133 def emailuser(user):
130 """Return the user portion of an email address."""
134 """Return the user portion of an email address."""
131 f = user.find('@')
135 f = user.find('@')
132 if f >= 0:
136 if f >= 0:
133 user = user[:f]
137 user = user[:f]
134 f = user.find('<')
138 f = user.find('<')
135 if f >= 0:
139 if f >= 0:
136 user = user[f + 1:]
140 user = user[f + 1:]
137 return user
141 return user
138
142
139 def email(author):
143 def email(author):
140 '''get email of author.'''
144 '''get email of author.'''
141 r = author.find('>')
145 r = author.find('>')
142 if r == -1:
146 if r == -1:
143 r = None
147 r = None
144 return author[author.find('<') + 1:r]
148 return author[author.find('<') + 1:r]
145
149
146 def person(author):
150 def person(author):
147 """Returns the name before an email address,
151 """Returns the name before an email address,
148 interpreting it as per RFC 5322
152 interpreting it as per RFC 5322
149
153
150 >>> person(b'foo@bar')
154 >>> person(b'foo@bar')
151 'foo'
155 'foo'
152 >>> person(b'Foo Bar <foo@bar>')
156 >>> person(b'Foo Bar <foo@bar>')
153 'Foo Bar'
157 'Foo Bar'
154 >>> person(b'"Foo Bar" <foo@bar>')
158 >>> person(b'"Foo Bar" <foo@bar>')
155 'Foo Bar'
159 'Foo Bar'
156 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
160 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
157 'Foo "buz" Bar'
161 'Foo "buz" Bar'
158 >>> # The following are invalid, but do exist in real-life
162 >>> # The following are invalid, but do exist in real-life
159 ...
163 ...
160 >>> person(b'Foo "buz" Bar <foo@bar>')
164 >>> person(b'Foo "buz" Bar <foo@bar>')
161 'Foo "buz" Bar'
165 'Foo "buz" Bar'
162 >>> person(b'"Foo Bar <foo@bar>')
166 >>> person(b'"Foo Bar <foo@bar>')
163 'Foo Bar'
167 'Foo Bar'
164 """
168 """
165 if '@' not in author:
169 if '@' not in author:
166 return author
170 return author
167 f = author.find('<')
171 f = author.find('<')
168 if f != -1:
172 if f != -1:
169 return author[:f].strip(' "').replace('\\"', '"')
173 return author[:f].strip(' "').replace('\\"', '"')
170 f = author.find('@')
174 f = author.find('@')
171 return author[:f].replace('.', ' ')
175 return author[:f].replace('.', ' ')
172
176
173 @attr.s(hash=True)
177 @attr.s(hash=True)
174 class mailmapping(object):
178 class mailmapping(object):
175 '''Represents a username/email key or value in
179 '''Represents a username/email key or value in
176 a mailmap file'''
180 a mailmap file'''
177 email = attr.ib()
181 email = attr.ib()
178 name = attr.ib(default=None)
182 name = attr.ib(default=None)
179
183
180 def _ismailmaplineinvalid(names, emails):
184 def _ismailmaplineinvalid(names, emails):
181 '''Returns True if the parsed names and emails
185 '''Returns True if the parsed names and emails
182 in a mailmap entry are invalid.
186 in a mailmap entry are invalid.
183
187
184 >>> # No names or emails fails
188 >>> # No names or emails fails
185 >>> names, emails = [], []
189 >>> names, emails = [], []
186 >>> _ismailmaplineinvalid(names, emails)
190 >>> _ismailmaplineinvalid(names, emails)
187 True
191 True
188 >>> # Only one email fails
192 >>> # Only one email fails
189 >>> emails = [b'email@email.com']
193 >>> emails = [b'email@email.com']
190 >>> _ismailmaplineinvalid(names, emails)
194 >>> _ismailmaplineinvalid(names, emails)
191 True
195 True
192 >>> # One email and one name passes
196 >>> # One email and one name passes
193 >>> names = [b'Test Name']
197 >>> names = [b'Test Name']
194 >>> _ismailmaplineinvalid(names, emails)
198 >>> _ismailmaplineinvalid(names, emails)
195 False
199 False
196 >>> # No names but two emails passes
200 >>> # No names but two emails passes
197 >>> names = []
201 >>> names = []
198 >>> emails = [b'proper@email.com', b'commit@email.com']
202 >>> emails = [b'proper@email.com', b'commit@email.com']
199 >>> _ismailmaplineinvalid(names, emails)
203 >>> _ismailmaplineinvalid(names, emails)
200 False
204 False
201 '''
205 '''
202 return not emails or not names and len(emails) < 2
206 return not emails or not names and len(emails) < 2
203
207
204 def parsemailmap(mailmapcontent):
208 def parsemailmap(mailmapcontent):
205 """Parses data in the .mailmap format
209 """Parses data in the .mailmap format
206
210
207 >>> mmdata = b"\\n".join([
211 >>> mmdata = b"\\n".join([
208 ... b'# Comment',
212 ... b'# Comment',
209 ... b'Name <commit1@email.xx>',
213 ... b'Name <commit1@email.xx>',
210 ... b'<name@email.xx> <commit2@email.xx>',
214 ... b'<name@email.xx> <commit2@email.xx>',
211 ... b'Name <proper@email.xx> <commit3@email.xx>',
215 ... b'Name <proper@email.xx> <commit3@email.xx>',
212 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
216 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
213 ... ])
217 ... ])
214 >>> mm = parsemailmap(mmdata)
218 >>> mm = parsemailmap(mmdata)
215 >>> for key in sorted(mm.keys()):
219 >>> for key in sorted(mm.keys()):
216 ... print(key)
220 ... print(key)
217 mailmapping(email='commit1@email.xx', name=None)
221 mailmapping(email='commit1@email.xx', name=None)
218 mailmapping(email='commit2@email.xx', name=None)
222 mailmapping(email='commit2@email.xx', name=None)
219 mailmapping(email='commit3@email.xx', name=None)
223 mailmapping(email='commit3@email.xx', name=None)
220 mailmapping(email='commit4@email.xx', name='Commit')
224 mailmapping(email='commit4@email.xx', name='Commit')
221 >>> for val in sorted(mm.values()):
225 >>> for val in sorted(mm.values()):
222 ... print(val)
226 ... print(val)
223 mailmapping(email='commit1@email.xx', name='Name')
227 mailmapping(email='commit1@email.xx', name='Name')
224 mailmapping(email='name@email.xx', name=None)
228 mailmapping(email='name@email.xx', name=None)
225 mailmapping(email='proper@email.xx', name='Name')
229 mailmapping(email='proper@email.xx', name='Name')
226 mailmapping(email='proper@email.xx', name='Name')
230 mailmapping(email='proper@email.xx', name='Name')
227 """
231 """
228 mailmap = {}
232 mailmap = {}
229
233
230 if mailmapcontent is None:
234 if mailmapcontent is None:
231 return mailmap
235 return mailmap
232
236
233 for line in mailmapcontent.splitlines():
237 for line in mailmapcontent.splitlines():
234
238
235 # Don't bother checking the line if it is a comment or
239 # Don't bother checking the line if it is a comment or
236 # is an improperly formed author field
240 # is an improperly formed author field
237 if line.lstrip().startswith('#'):
241 if line.lstrip().startswith('#'):
238 continue
242 continue
239
243
240 # names, emails hold the parsed emails and names for each line
244 # names, emails hold the parsed emails and names for each line
241 # name_builder holds the words in a persons name
245 # name_builder holds the words in a persons name
242 names, emails = [], []
246 names, emails = [], []
243 namebuilder = []
247 namebuilder = []
244
248
245 for element in line.split():
249 for element in line.split():
246 if element.startswith('#'):
250 if element.startswith('#'):
247 # If we reach a comment in the mailmap file, move on
251 # If we reach a comment in the mailmap file, move on
248 break
252 break
249
253
250 elif element.startswith('<') and element.endswith('>'):
254 elif element.startswith('<') and element.endswith('>'):
251 # We have found an email.
255 # We have found an email.
252 # Parse it, and finalize any names from earlier
256 # Parse it, and finalize any names from earlier
253 emails.append(element[1:-1]) # Slice off the "<>"
257 emails.append(element[1:-1]) # Slice off the "<>"
254
258
255 if namebuilder:
259 if namebuilder:
256 names.append(' '.join(namebuilder))
260 names.append(' '.join(namebuilder))
257 namebuilder = []
261 namebuilder = []
258
262
259 # Break if we have found a second email, any other
263 # Break if we have found a second email, any other
260 # data does not fit the spec for .mailmap
264 # data does not fit the spec for .mailmap
261 if len(emails) > 1:
265 if len(emails) > 1:
262 break
266 break
263
267
264 else:
268 else:
265 # We have found another word in the committers name
269 # We have found another word in the committers name
266 namebuilder.append(element)
270 namebuilder.append(element)
267
271
268 # Check to see if we have parsed the line into a valid form
272 # Check to see if we have parsed the line into a valid form
269 # We require at least one email, and either at least one
273 # We require at least one email, and either at least one
270 # name or a second email
274 # name or a second email
271 if _ismailmaplineinvalid(names, emails):
275 if _ismailmaplineinvalid(names, emails):
272 continue
276 continue
273
277
274 mailmapkey = mailmapping(
278 mailmapkey = mailmapping(
275 email=emails[-1],
279 email=emails[-1],
276 name=names[-1] if len(names) == 2 else None,
280 name=names[-1] if len(names) == 2 else None,
277 )
281 )
278
282
279 mailmap[mailmapkey] = mailmapping(
283 mailmap[mailmapkey] = mailmapping(
280 email=emails[0],
284 email=emails[0],
281 name=names[0] if names else None,
285 name=names[0] if names else None,
282 )
286 )
283
287
284 return mailmap
288 return mailmap
285
289
286 def mapname(mailmap, author):
290 def mapname(mailmap, author):
287 """Returns the author field according to the mailmap cache, or
291 """Returns the author field according to the mailmap cache, or
288 the original author field.
292 the original author field.
289
293
290 >>> mmdata = b"\\n".join([
294 >>> mmdata = b"\\n".join([
291 ... b'# Comment',
295 ... b'# Comment',
292 ... b'Name <commit1@email.xx>',
296 ... b'Name <commit1@email.xx>',
293 ... b'<name@email.xx> <commit2@email.xx>',
297 ... b'<name@email.xx> <commit2@email.xx>',
294 ... b'Name <proper@email.xx> <commit3@email.xx>',
298 ... b'Name <proper@email.xx> <commit3@email.xx>',
295 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
299 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
296 ... ])
300 ... ])
297 >>> m = parsemailmap(mmdata)
301 >>> m = parsemailmap(mmdata)
298 >>> mapname(m, b'Commit <commit1@email.xx>')
302 >>> mapname(m, b'Commit <commit1@email.xx>')
299 'Name <commit1@email.xx>'
303 'Name <commit1@email.xx>'
300 >>> mapname(m, b'Name <commit2@email.xx>')
304 >>> mapname(m, b'Name <commit2@email.xx>')
301 'Name <name@email.xx>'
305 'Name <name@email.xx>'
302 >>> mapname(m, b'Commit <commit3@email.xx>')
306 >>> mapname(m, b'Commit <commit3@email.xx>')
303 'Name <proper@email.xx>'
307 'Name <proper@email.xx>'
304 >>> mapname(m, b'Commit <commit4@email.xx>')
308 >>> mapname(m, b'Commit <commit4@email.xx>')
305 'Name <proper@email.xx>'
309 'Name <proper@email.xx>'
306 >>> mapname(m, b'Unknown Name <unknown@email.com>')
310 >>> mapname(m, b'Unknown Name <unknown@email.com>')
307 'Unknown Name <unknown@email.com>'
311 'Unknown Name <unknown@email.com>'
308 """
312 """
309 # If the author field coming in isn't in the correct format,
313 # If the author field coming in isn't in the correct format,
310 # or the mailmap is empty just return the original author field
314 # or the mailmap is empty just return the original author field
311 if not isauthorwellformed(author) or not mailmap:
315 if not isauthorwellformed(author) or not mailmap:
312 return author
316 return author
313
317
314 # Turn the user name into a mailmapping
318 # Turn the user name into a mailmapping
315 commit = mailmapping(name=person(author), email=email(author))
319 commit = mailmapping(name=person(author), email=email(author))
316
320
317 try:
321 try:
318 # Try and use both the commit email and name as the key
322 # Try and use both the commit email and name as the key
319 proper = mailmap[commit]
323 proper = mailmap[commit]
320
324
321 except KeyError:
325 except KeyError:
322 # If the lookup fails, use just the email as the key instead
326 # If the lookup fails, use just the email as the key instead
323 # We call this commit2 as not to erase original commit fields
327 # We call this commit2 as not to erase original commit fields
324 commit2 = mailmapping(email=commit.email)
328 commit2 = mailmapping(email=commit.email)
325 proper = mailmap.get(commit2, mailmapping(None, None))
329 proper = mailmap.get(commit2, mailmapping(None, None))
326
330
327 # Return the author field with proper values filled in
331 # Return the author field with proper values filled in
328 return '%s <%s>' % (
332 return '%s <%s>' % (
329 proper.name if proper.name else commit.name,
333 proper.name if proper.name else commit.name,
330 proper.email if proper.email else commit.email,
334 proper.email if proper.email else commit.email,
331 )
335 )
332
336
333 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
337 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
334
338
335 def isauthorwellformed(author):
339 def isauthorwellformed(author):
336 '''Return True if the author field is well formed
340 '''Return True if the author field is well formed
337 (ie "Contributor Name <contrib@email.dom>")
341 (ie "Contributor Name <contrib@email.dom>")
338
342
339 >>> isauthorwellformed(b'Good Author <good@author.com>')
343 >>> isauthorwellformed(b'Good Author <good@author.com>')
340 True
344 True
341 >>> isauthorwellformed(b'Author <good@author.com>')
345 >>> isauthorwellformed(b'Author <good@author.com>')
342 True
346 True
343 >>> isauthorwellformed(b'Bad Author')
347 >>> isauthorwellformed(b'Bad Author')
344 False
348 False
345 >>> isauthorwellformed(b'Bad Author <author@author.com')
349 >>> isauthorwellformed(b'Bad Author <author@author.com')
346 False
350 False
347 >>> isauthorwellformed(b'Bad Author author@author.com')
351 >>> isauthorwellformed(b'Bad Author author@author.com')
348 False
352 False
349 >>> isauthorwellformed(b'<author@author.com>')
353 >>> isauthorwellformed(b'<author@author.com>')
350 False
354 False
351 >>> isauthorwellformed(b'Bad Author <author>')
355 >>> isauthorwellformed(b'Bad Author <author>')
352 False
356 False
353 '''
357 '''
354 return _correctauthorformat.match(author) is not None
358 return _correctauthorformat.match(author) is not None
355
359
356 def ellipsis(text, maxlength=400):
360 def ellipsis(text, maxlength=400):
357 """Trim string to at most maxlength (default: 400) columns in display."""
361 """Trim string to at most maxlength (default: 400) columns in display."""
358 return encoding.trim(text, maxlength, ellipsis='...')
362 return encoding.trim(text, maxlength, ellipsis='...')
359
363
360 def escapestr(s):
364 def escapestr(s):
361 # call underlying function of s.encode('string_escape') directly for
365 # call underlying function of s.encode('string_escape') directly for
362 # Python 3 compatibility
366 # Python 3 compatibility
363 return codecs.escape_encode(s)[0]
367 return codecs.escape_encode(s)[0]
364
368
365 def unescapestr(s):
369 def unescapestr(s):
366 return codecs.escape_decode(s)[0]
370 return codecs.escape_decode(s)[0]
367
371
368 def forcebytestr(obj):
372 def forcebytestr(obj):
369 """Portably format an arbitrary object (e.g. exception) into a byte
373 """Portably format an arbitrary object (e.g. exception) into a byte
370 string."""
374 string."""
371 try:
375 try:
372 return pycompat.bytestr(obj)
376 return pycompat.bytestr(obj)
373 except UnicodeEncodeError:
377 except UnicodeEncodeError:
374 # non-ascii string, may be lossy
378 # non-ascii string, may be lossy
375 return pycompat.bytestr(encoding.strtolocal(str(obj)))
379 return pycompat.bytestr(encoding.strtolocal(str(obj)))
376
380
377 def uirepr(s):
381 def uirepr(s):
378 # Avoid double backslash in Windows path repr()
382 # Avoid double backslash in Windows path repr()
379 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
383 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
380
384
381 # delay import of textwrap
385 # delay import of textwrap
382 def _MBTextWrapper(**kwargs):
386 def _MBTextWrapper(**kwargs):
383 class tw(textwrap.TextWrapper):
387 class tw(textwrap.TextWrapper):
384 """
388 """
385 Extend TextWrapper for width-awareness.
389 Extend TextWrapper for width-awareness.
386
390
387 Neither number of 'bytes' in any encoding nor 'characters' is
391 Neither number of 'bytes' in any encoding nor 'characters' is
388 appropriate to calculate terminal columns for specified string.
392 appropriate to calculate terminal columns for specified string.
389
393
390 Original TextWrapper implementation uses built-in 'len()' directly,
394 Original TextWrapper implementation uses built-in 'len()' directly,
391 so overriding is needed to use width information of each characters.
395 so overriding is needed to use width information of each characters.
392
396
393 In addition, characters classified into 'ambiguous' width are
397 In addition, characters classified into 'ambiguous' width are
394 treated as wide in East Asian area, but as narrow in other.
398 treated as wide in East Asian area, but as narrow in other.
395
399
396 This requires use decision to determine width of such characters.
400 This requires use decision to determine width of such characters.
397 """
401 """
398 def _cutdown(self, ucstr, space_left):
402 def _cutdown(self, ucstr, space_left):
399 l = 0
403 l = 0
400 colwidth = encoding.ucolwidth
404 colwidth = encoding.ucolwidth
401 for i in xrange(len(ucstr)):
405 for i in xrange(len(ucstr)):
402 l += colwidth(ucstr[i])
406 l += colwidth(ucstr[i])
403 if space_left < l:
407 if space_left < l:
404 return (ucstr[:i], ucstr[i:])
408 return (ucstr[:i], ucstr[i:])
405 return ucstr, ''
409 return ucstr, ''
406
410
407 # overriding of base class
411 # overriding of base class
408 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
412 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
409 space_left = max(width - cur_len, 1)
413 space_left = max(width - cur_len, 1)
410
414
411 if self.break_long_words:
415 if self.break_long_words:
412 cut, res = self._cutdown(reversed_chunks[-1], space_left)
416 cut, res = self._cutdown(reversed_chunks[-1], space_left)
413 cur_line.append(cut)
417 cur_line.append(cut)
414 reversed_chunks[-1] = res
418 reversed_chunks[-1] = res
415 elif not cur_line:
419 elif not cur_line:
416 cur_line.append(reversed_chunks.pop())
420 cur_line.append(reversed_chunks.pop())
417
421
418 # this overriding code is imported from TextWrapper of Python 2.6
422 # this overriding code is imported from TextWrapper of Python 2.6
419 # to calculate columns of string by 'encoding.ucolwidth()'
423 # to calculate columns of string by 'encoding.ucolwidth()'
420 def _wrap_chunks(self, chunks):
424 def _wrap_chunks(self, chunks):
421 colwidth = encoding.ucolwidth
425 colwidth = encoding.ucolwidth
422
426
423 lines = []
427 lines = []
424 if self.width <= 0:
428 if self.width <= 0:
425 raise ValueError("invalid width %r (must be > 0)" % self.width)
429 raise ValueError("invalid width %r (must be > 0)" % self.width)
426
430
427 # Arrange in reverse order so items can be efficiently popped
431 # Arrange in reverse order so items can be efficiently popped
428 # from a stack of chucks.
432 # from a stack of chucks.
429 chunks.reverse()
433 chunks.reverse()
430
434
431 while chunks:
435 while chunks:
432
436
433 # Start the list of chunks that will make up the current line.
437 # Start the list of chunks that will make up the current line.
434 # cur_len is just the length of all the chunks in cur_line.
438 # cur_len is just the length of all the chunks in cur_line.
435 cur_line = []
439 cur_line = []
436 cur_len = 0
440 cur_len = 0
437
441
438 # Figure out which static string will prefix this line.
442 # Figure out which static string will prefix this line.
439 if lines:
443 if lines:
440 indent = self.subsequent_indent
444 indent = self.subsequent_indent
441 else:
445 else:
442 indent = self.initial_indent
446 indent = self.initial_indent
443
447
444 # Maximum width for this line.
448 # Maximum width for this line.
445 width = self.width - len(indent)
449 width = self.width - len(indent)
446
450
447 # First chunk on line is whitespace -- drop it, unless this
451 # First chunk on line is whitespace -- drop it, unless this
448 # is the very beginning of the text (i.e. no lines started yet).
452 # is the very beginning of the text (i.e. no lines started yet).
449 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
453 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
450 del chunks[-1]
454 del chunks[-1]
451
455
452 while chunks:
456 while chunks:
453 l = colwidth(chunks[-1])
457 l = colwidth(chunks[-1])
454
458
455 # Can at least squeeze this chunk onto the current line.
459 # Can at least squeeze this chunk onto the current line.
456 if cur_len + l <= width:
460 if cur_len + l <= width:
457 cur_line.append(chunks.pop())
461 cur_line.append(chunks.pop())
458 cur_len += l
462 cur_len += l
459
463
460 # Nope, this line is full.
464 # Nope, this line is full.
461 else:
465 else:
462 break
466 break
463
467
464 # The current line is full, and the next chunk is too big to
468 # The current line is full, and the next chunk is too big to
465 # fit on *any* line (not just this one).
469 # fit on *any* line (not just this one).
466 if chunks and colwidth(chunks[-1]) > width:
470 if chunks and colwidth(chunks[-1]) > width:
467 self._handle_long_word(chunks, cur_line, cur_len, width)
471 self._handle_long_word(chunks, cur_line, cur_len, width)
468
472
469 # If the last chunk on this line is all whitespace, drop it.
473 # If the last chunk on this line is all whitespace, drop it.
470 if (self.drop_whitespace and
474 if (self.drop_whitespace and
471 cur_line and cur_line[-1].strip() == r''):
475 cur_line and cur_line[-1].strip() == r''):
472 del cur_line[-1]
476 del cur_line[-1]
473
477
474 # Convert current line back to a string and store it in list
478 # Convert current line back to a string and store it in list
475 # of all lines (return value).
479 # of all lines (return value).
476 if cur_line:
480 if cur_line:
477 lines.append(indent + r''.join(cur_line))
481 lines.append(indent + r''.join(cur_line))
478
482
479 return lines
483 return lines
480
484
481 global _MBTextWrapper
485 global _MBTextWrapper
482 _MBTextWrapper = tw
486 _MBTextWrapper = tw
483 return tw(**kwargs)
487 return tw(**kwargs)
484
488
485 def wrap(line, width, initindent='', hangindent=''):
489 def wrap(line, width, initindent='', hangindent=''):
486 maxindent = max(len(hangindent), len(initindent))
490 maxindent = max(len(hangindent), len(initindent))
487 if width <= maxindent:
491 if width <= maxindent:
488 # adjust for weird terminal size
492 # adjust for weird terminal size
489 width = max(78, maxindent + 1)
493 width = max(78, maxindent + 1)
490 line = line.decode(pycompat.sysstr(encoding.encoding),
494 line = line.decode(pycompat.sysstr(encoding.encoding),
491 pycompat.sysstr(encoding.encodingmode))
495 pycompat.sysstr(encoding.encodingmode))
492 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
496 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
493 pycompat.sysstr(encoding.encodingmode))
497 pycompat.sysstr(encoding.encodingmode))
494 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
498 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
495 pycompat.sysstr(encoding.encodingmode))
499 pycompat.sysstr(encoding.encodingmode))
496 wrapper = _MBTextWrapper(width=width,
500 wrapper = _MBTextWrapper(width=width,
497 initial_indent=initindent,
501 initial_indent=initindent,
498 subsequent_indent=hangindent)
502 subsequent_indent=hangindent)
499 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
503 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
500
504
501 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
505 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
502 '0': False, 'no': False, 'false': False, 'off': False,
506 '0': False, 'no': False, 'false': False, 'off': False,
503 'never': False}
507 'never': False}
504
508
505 def parsebool(s):
509 def parsebool(s):
506 """Parse s into a boolean.
510 """Parse s into a boolean.
507
511
508 If s is not a valid boolean, returns None.
512 If s is not a valid boolean, returns None.
509 """
513 """
510 return _booleans.get(s.lower(), None)
514 return _booleans.get(s.lower(), None)
511
515
512 def evalpythonliteral(s):
516 def evalpythonliteral(s):
513 """Evaluate a string containing a Python literal expression"""
517 """Evaluate a string containing a Python literal expression"""
514 # We could backport our tokenizer hack to rewrite '' to u'' if we want
518 # We could backport our tokenizer hack to rewrite '' to u'' if we want
515 if pycompat.ispy3:
519 if pycompat.ispy3:
516 return ast.literal_eval(s.decode('latin1'))
520 return ast.literal_eval(s.decode('latin1'))
517 return ast.literal_eval(s)
521 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now