##// END OF EJS Templates
stringutil: improve check for failed mailmap line parsing...
Connor Sheehan -
r37263:0e7550b0 default
parent child Browse files
Show More
@@ -1,469 +1,499
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import codecs
12 import codecs
13 import re as remod
13 import re as remod
14 import textwrap
14 import textwrap
15
15
16 from ..i18n import _
16 from ..i18n import _
17 from ..thirdparty import attr
17 from ..thirdparty import attr
18
18
19 from .. import (
19 from .. import (
20 encoding,
20 encoding,
21 error,
21 error,
22 pycompat,
22 pycompat,
23 )
23 )
24
24
25 _DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
25 _DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
26 _DATA_ESCAPE_MAP.update({
26 _DATA_ESCAPE_MAP.update({
27 b'\\': b'\\\\',
27 b'\\': b'\\\\',
28 b'\r': br'\r',
28 b'\r': br'\r',
29 b'\n': br'\n',
29 b'\n': br'\n',
30 })
30 })
31 _DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
31 _DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
32
32
33 def escapedata(s):
33 def escapedata(s):
34 if isinstance(s, bytearray):
34 if isinstance(s, bytearray):
35 s = bytes(s)
35 s = bytes(s)
36
36
37 return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
37 return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
38
38
39 def binary(s):
39 def binary(s):
40 """return true if a string is binary data"""
40 """return true if a string is binary data"""
41 return bool(s and '\0' in s)
41 return bool(s and '\0' in s)
42
42
43 def stringmatcher(pattern, casesensitive=True):
43 def stringmatcher(pattern, casesensitive=True):
44 """
44 """
45 accepts a string, possibly starting with 're:' or 'literal:' prefix.
45 accepts a string, possibly starting with 're:' or 'literal:' prefix.
46 returns the matcher name, pattern, and matcher function.
46 returns the matcher name, pattern, and matcher function.
47 missing or unknown prefixes are treated as literal matches.
47 missing or unknown prefixes are treated as literal matches.
48
48
49 helper for tests:
49 helper for tests:
50 >>> def test(pattern, *tests):
50 >>> def test(pattern, *tests):
51 ... kind, pattern, matcher = stringmatcher(pattern)
51 ... kind, pattern, matcher = stringmatcher(pattern)
52 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
52 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
53 >>> def itest(pattern, *tests):
53 >>> def itest(pattern, *tests):
54 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
54 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
55 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
55 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
56
56
57 exact matching (no prefix):
57 exact matching (no prefix):
58 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
58 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
59 ('literal', 'abcdefg', [False, False, True])
59 ('literal', 'abcdefg', [False, False, True])
60
60
61 regex matching ('re:' prefix)
61 regex matching ('re:' prefix)
62 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
62 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
63 ('re', 'a.+b', [False, False, True])
63 ('re', 'a.+b', [False, False, True])
64
64
65 force exact matches ('literal:' prefix)
65 force exact matches ('literal:' prefix)
66 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
66 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
67 ('literal', 're:foobar', [False, True])
67 ('literal', 're:foobar', [False, True])
68
68
69 unknown prefixes are ignored and treated as literals
69 unknown prefixes are ignored and treated as literals
70 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
70 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
71 ('literal', 'foo:bar', [False, False, True])
71 ('literal', 'foo:bar', [False, False, True])
72
72
73 case insensitive regex matches
73 case insensitive regex matches
74 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
74 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
75 ('re', 'A.+b', [False, False, True])
75 ('re', 'A.+b', [False, False, True])
76
76
77 case insensitive literal matches
77 case insensitive literal matches
78 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
78 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
79 ('literal', 'ABCDEFG', [False, False, True])
79 ('literal', 'ABCDEFG', [False, False, True])
80 """
80 """
81 if pattern.startswith('re:'):
81 if pattern.startswith('re:'):
82 pattern = pattern[3:]
82 pattern = pattern[3:]
83 try:
83 try:
84 flags = 0
84 flags = 0
85 if not casesensitive:
85 if not casesensitive:
86 flags = remod.I
86 flags = remod.I
87 regex = remod.compile(pattern, flags)
87 regex = remod.compile(pattern, flags)
88 except remod.error as e:
88 except remod.error as e:
89 raise error.ParseError(_('invalid regular expression: %s')
89 raise error.ParseError(_('invalid regular expression: %s')
90 % e)
90 % e)
91 return 're', pattern, regex.search
91 return 're', pattern, regex.search
92 elif pattern.startswith('literal:'):
92 elif pattern.startswith('literal:'):
93 pattern = pattern[8:]
93 pattern = pattern[8:]
94
94
95 match = pattern.__eq__
95 match = pattern.__eq__
96
96
97 if not casesensitive:
97 if not casesensitive:
98 ipat = encoding.lower(pattern)
98 ipat = encoding.lower(pattern)
99 match = lambda s: ipat == encoding.lower(s)
99 match = lambda s: ipat == encoding.lower(s)
100 return 'literal', pattern, match
100 return 'literal', pattern, match
101
101
102 def shortuser(user):
102 def shortuser(user):
103 """Return a short representation of a user name or email address."""
103 """Return a short representation of a user name or email address."""
104 f = user.find('@')
104 f = user.find('@')
105 if f >= 0:
105 if f >= 0:
106 user = user[:f]
106 user = user[:f]
107 f = user.find('<')
107 f = user.find('<')
108 if f >= 0:
108 if f >= 0:
109 user = user[f + 1:]
109 user = user[f + 1:]
110 f = user.find(' ')
110 f = user.find(' ')
111 if f >= 0:
111 if f >= 0:
112 user = user[:f]
112 user = user[:f]
113 f = user.find('.')
113 f = user.find('.')
114 if f >= 0:
114 if f >= 0:
115 user = user[:f]
115 user = user[:f]
116 return user
116 return user
117
117
118 def emailuser(user):
118 def emailuser(user):
119 """Return the user portion of an email address."""
119 """Return the user portion of an email address."""
120 f = user.find('@')
120 f = user.find('@')
121 if f >= 0:
121 if f >= 0:
122 user = user[:f]
122 user = user[:f]
123 f = user.find('<')
123 f = user.find('<')
124 if f >= 0:
124 if f >= 0:
125 user = user[f + 1:]
125 user = user[f + 1:]
126 return user
126 return user
127
127
128 def email(author):
128 def email(author):
129 '''get email of author.'''
129 '''get email of author.'''
130 r = author.find('>')
130 r = author.find('>')
131 if r == -1:
131 if r == -1:
132 r = None
132 r = None
133 return author[author.find('<') + 1:r]
133 return author[author.find('<') + 1:r]
134
134
135 def person(author):
135 def person(author):
136 """Returns the name before an email address,
136 """Returns the name before an email address,
137 interpreting it as per RFC 5322
137 interpreting it as per RFC 5322
138
138
139 >>> person(b'foo@bar')
139 >>> person(b'foo@bar')
140 'foo'
140 'foo'
141 >>> person(b'Foo Bar <foo@bar>')
141 >>> person(b'Foo Bar <foo@bar>')
142 'Foo Bar'
142 'Foo Bar'
143 >>> person(b'"Foo Bar" <foo@bar>')
143 >>> person(b'"Foo Bar" <foo@bar>')
144 'Foo Bar'
144 'Foo Bar'
145 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
145 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
146 'Foo "buz" Bar'
146 'Foo "buz" Bar'
147 >>> # The following are invalid, but do exist in real-life
147 >>> # The following are invalid, but do exist in real-life
148 ...
148 ...
149 >>> person(b'Foo "buz" Bar <foo@bar>')
149 >>> person(b'Foo "buz" Bar <foo@bar>')
150 'Foo "buz" Bar'
150 'Foo "buz" Bar'
151 >>> person(b'"Foo Bar <foo@bar>')
151 >>> person(b'"Foo Bar <foo@bar>')
152 'Foo Bar'
152 'Foo Bar'
153 """
153 """
154 if '@' not in author:
154 if '@' not in author:
155 return author
155 return author
156 f = author.find('<')
156 f = author.find('<')
157 if f != -1:
157 if f != -1:
158 return author[:f].strip(' "').replace('\\"', '"')
158 return author[:f].strip(' "').replace('\\"', '"')
159 f = author.find('@')
159 f = author.find('@')
160 return author[:f].replace('.', ' ')
160 return author[:f].replace('.', ' ')
161
161
162 @attr.s(hash=True)
162 @attr.s(hash=True)
163 class mailmapping(object):
163 class mailmapping(object):
164 '''Represents a username/email key or value in
164 '''Represents a username/email key or value in
165 a mailmap file'''
165 a mailmap file'''
166 email = attr.ib()
166 email = attr.ib()
167 name = attr.ib(default=None)
167 name = attr.ib(default=None)
168
168
169 def _ismailmaplineinvalid(names, emails):
170 '''Returns True if the parsed names and emails
171 in a mailmap entry are invalid.
172
173 >>> # No names or emails fails
174 >>> names, emails = [], []
175 >>> _ismailmaplineinvalid(names, emails)
176 True
177 >>> # Only one email fails
178 >>> emails = [b'email@email.com']
179 >>> _ismailmaplineinvalid(names, emails)
180 True
181 >>> # One email and one name passes
182 >>> names = [b'Test Name']
183 >>> _ismailmaplineinvalid(names, emails)
184 False
185 >>> # No names but two emails passes
186 >>> names = []
187 >>> emails = [b'proper@email.com', b'commit@email.com']
188 >>> _ismailmaplineinvalid(names, emails)
189 False
190 '''
191 return not emails or not names and len(emails) < 2
192
169 def parsemailmap(mailmapcontent):
193 def parsemailmap(mailmapcontent):
170 """Parses data in the .mailmap format
194 """Parses data in the .mailmap format
171
195
172 >>> mmdata = b"\\n".join([
196 >>> mmdata = b"\\n".join([
173 ... b'# Comment',
197 ... b'# Comment',
174 ... b'Name <commit1@email.xx>',
198 ... b'Name <commit1@email.xx>',
175 ... b'<name@email.xx> <commit2@email.xx>',
199 ... b'<name@email.xx> <commit2@email.xx>',
176 ... b'Name <proper@email.xx> <commit3@email.xx>',
200 ... b'Name <proper@email.xx> <commit3@email.xx>',
177 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
201 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
178 ... ])
202 ... ])
179 >>> mm = parsemailmap(mmdata)
203 >>> mm = parsemailmap(mmdata)
180 >>> for key in sorted(mm.keys()):
204 >>> for key in sorted(mm.keys()):
181 ... print(key)
205 ... print(key)
182 mailmapping(email='commit1@email.xx', name=None)
206 mailmapping(email='commit1@email.xx', name=None)
183 mailmapping(email='commit2@email.xx', name=None)
207 mailmapping(email='commit2@email.xx', name=None)
184 mailmapping(email='commit3@email.xx', name=None)
208 mailmapping(email='commit3@email.xx', name=None)
185 mailmapping(email='commit4@email.xx', name='Commit')
209 mailmapping(email='commit4@email.xx', name='Commit')
186 >>> for val in sorted(mm.values()):
210 >>> for val in sorted(mm.values()):
187 ... print(val)
211 ... print(val)
188 mailmapping(email='commit1@email.xx', name='Name')
212 mailmapping(email='commit1@email.xx', name='Name')
189 mailmapping(email='name@email.xx', name=None)
213 mailmapping(email='name@email.xx', name=None)
190 mailmapping(email='proper@email.xx', name='Name')
214 mailmapping(email='proper@email.xx', name='Name')
191 mailmapping(email='proper@email.xx', name='Name')
215 mailmapping(email='proper@email.xx', name='Name')
192 """
216 """
193 mailmap = {}
217 mailmap = {}
194
218
195 if mailmapcontent is None:
219 if mailmapcontent is None:
196 return mailmap
220 return mailmap
197
221
198 for line in mailmapcontent.splitlines():
222 for line in mailmapcontent.splitlines():
199
223
200 # Don't bother checking the line if it is a comment or
224 # Don't bother checking the line if it is a comment or
201 # is an improperly formed author field
225 # is an improperly formed author field
202 if line.lstrip().startswith('#') or any(c not in line for c in '<>@'):
226 if line.lstrip().startswith('#'):
203 continue
227 continue
204
228
205 # names, emails hold the parsed emails and names for each line
229 # names, emails hold the parsed emails and names for each line
206 # name_builder holds the words in a persons name
230 # name_builder holds the words in a persons name
207 names, emails = [], []
231 names, emails = [], []
208 namebuilder = []
232 namebuilder = []
209
233
210 for element in line.split():
234 for element in line.split():
211 if element.startswith('#'):
235 if element.startswith('#'):
212 # If we reach a comment in the mailmap file, move on
236 # If we reach a comment in the mailmap file, move on
213 break
237 break
214
238
215 elif element.startswith('<') and element.endswith('>'):
239 elif element.startswith('<') and element.endswith('>'):
216 # We have found an email.
240 # We have found an email.
217 # Parse it, and finalize any names from earlier
241 # Parse it, and finalize any names from earlier
218 emails.append(element[1:-1]) # Slice off the "<>"
242 emails.append(element[1:-1]) # Slice off the "<>"
219
243
220 if namebuilder:
244 if namebuilder:
221 names.append(' '.join(namebuilder))
245 names.append(' '.join(namebuilder))
222 namebuilder = []
246 namebuilder = []
223
247
224 # Break if we have found a second email, any other
248 # Break if we have found a second email, any other
225 # data does not fit the spec for .mailmap
249 # data does not fit the spec for .mailmap
226 if len(emails) > 1:
250 if len(emails) > 1:
227 break
251 break
228
252
229 else:
253 else:
230 # We have found another word in the committers name
254 # We have found another word in the committers name
231 namebuilder.append(element)
255 namebuilder.append(element)
232
256
257 # Check to see if we have parsed the line into a valid form
258 # We require at least one email, and either at least one
259 # name or a second email
260 if _ismailmaplineinvalid(names, emails):
261 continue
262
233 mailmapkey = mailmapping(
263 mailmapkey = mailmapping(
234 email=emails[-1],
264 email=emails[-1],
235 name=names[-1] if len(names) == 2 else None,
265 name=names[-1] if len(names) == 2 else None,
236 )
266 )
237
267
238 mailmap[mailmapkey] = mailmapping(
268 mailmap[mailmapkey] = mailmapping(
239 email=emails[0],
269 email=emails[0],
240 name=names[0] if names else None,
270 name=names[0] if names else None,
241 )
271 )
242
272
243 return mailmap
273 return mailmap
244
274
245 def mapname(mailmap, author):
275 def mapname(mailmap, author):
246 """Returns the author field according to the mailmap cache, or
276 """Returns the author field according to the mailmap cache, or
247 the original author field.
277 the original author field.
248
278
249 >>> mmdata = b"\\n".join([
279 >>> mmdata = b"\\n".join([
250 ... b'# Comment',
280 ... b'# Comment',
251 ... b'Name <commit1@email.xx>',
281 ... b'Name <commit1@email.xx>',
252 ... b'<name@email.xx> <commit2@email.xx>',
282 ... b'<name@email.xx> <commit2@email.xx>',
253 ... b'Name <proper@email.xx> <commit3@email.xx>',
283 ... b'Name <proper@email.xx> <commit3@email.xx>',
254 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
284 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
255 ... ])
285 ... ])
256 >>> m = parsemailmap(mmdata)
286 >>> m = parsemailmap(mmdata)
257 >>> mapname(m, b'Commit <commit1@email.xx>')
287 >>> mapname(m, b'Commit <commit1@email.xx>')
258 'Name <commit1@email.xx>'
288 'Name <commit1@email.xx>'
259 >>> mapname(m, b'Name <commit2@email.xx>')
289 >>> mapname(m, b'Name <commit2@email.xx>')
260 'Name <name@email.xx>'
290 'Name <name@email.xx>'
261 >>> mapname(m, b'Commit <commit3@email.xx>')
291 >>> mapname(m, b'Commit <commit3@email.xx>')
262 'Name <proper@email.xx>'
292 'Name <proper@email.xx>'
263 >>> mapname(m, b'Commit <commit4@email.xx>')
293 >>> mapname(m, b'Commit <commit4@email.xx>')
264 'Name <proper@email.xx>'
294 'Name <proper@email.xx>'
265 >>> mapname(m, b'Unknown Name <unknown@email.com>')
295 >>> mapname(m, b'Unknown Name <unknown@email.com>')
266 'Unknown Name <unknown@email.com>'
296 'Unknown Name <unknown@email.com>'
267 """
297 """
268 # If the author field coming in isn't in the correct format,
298 # If the author field coming in isn't in the correct format,
269 # or the mailmap is empty just return the original author field
299 # or the mailmap is empty just return the original author field
270 if not isauthorwellformed(author) or not mailmap:
300 if not isauthorwellformed(author) or not mailmap:
271 return author
301 return author
272
302
273 # Turn the user name into a mailmaptup
303 # Turn the user name into a mailmaptup
274 commit = mailmapping(name=person(author), email=email(author))
304 commit = mailmapping(name=person(author), email=email(author))
275
305
276 try:
306 try:
277 # Try and use both the commit email and name as the key
307 # Try and use both the commit email and name as the key
278 proper = mailmap[commit]
308 proper = mailmap[commit]
279
309
280 except KeyError:
310 except KeyError:
281 # If the lookup fails, use just the email as the key instead
311 # If the lookup fails, use just the email as the key instead
282 # We call this commit2 as not to erase original commit fields
312 # We call this commit2 as not to erase original commit fields
283 commit2 = mailmapping(email=commit.email)
313 commit2 = mailmapping(email=commit.email)
284 proper = mailmap.get(commit2, mailmapping(None, None))
314 proper = mailmap.get(commit2, mailmapping(None, None))
285
315
286 # Return the author field with proper values filled in
316 # Return the author field with proper values filled in
287 return '%s <%s>' % (
317 return '%s <%s>' % (
288 proper.name if proper.name else commit.name,
318 proper.name if proper.name else commit.name,
289 proper.email if proper.email else commit.email,
319 proper.email if proper.email else commit.email,
290 )
320 )
291
321
292 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
322 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
293
323
294 def isauthorwellformed(author):
324 def isauthorwellformed(author):
295 '''Return True if the author field is well formed
325 '''Return True if the author field is well formed
296 (ie "Contributor Name <contrib@email.dom>")
326 (ie "Contributor Name <contrib@email.dom>")
297
327
298 >>> isauthorwellformed(b'Good Author <good@author.com>')
328 >>> isauthorwellformed(b'Good Author <good@author.com>')
299 True
329 True
300 >>> isauthorwellformed(b'Author <good@author.com>')
330 >>> isauthorwellformed(b'Author <good@author.com>')
301 True
331 True
302 >>> isauthorwellformed(b'Bad Author')
332 >>> isauthorwellformed(b'Bad Author')
303 False
333 False
304 >>> isauthorwellformed(b'Bad Author <author@author.com')
334 >>> isauthorwellformed(b'Bad Author <author@author.com')
305 False
335 False
306 >>> isauthorwellformed(b'Bad Author author@author.com')
336 >>> isauthorwellformed(b'Bad Author author@author.com')
307 False
337 False
308 >>> isauthorwellformed(b'<author@author.com>')
338 >>> isauthorwellformed(b'<author@author.com>')
309 False
339 False
310 >>> isauthorwellformed(b'Bad Author <author>')
340 >>> isauthorwellformed(b'Bad Author <author>')
311 False
341 False
312 '''
342 '''
313 return _correctauthorformat.match(author) is not None
343 return _correctauthorformat.match(author) is not None
314
344
315 def ellipsis(text, maxlength=400):
345 def ellipsis(text, maxlength=400):
316 """Trim string to at most maxlength (default: 400) columns in display."""
346 """Trim string to at most maxlength (default: 400) columns in display."""
317 return encoding.trim(text, maxlength, ellipsis='...')
347 return encoding.trim(text, maxlength, ellipsis='...')
318
348
319 def escapestr(s):
349 def escapestr(s):
320 # call underlying function of s.encode('string_escape') directly for
350 # call underlying function of s.encode('string_escape') directly for
321 # Python 3 compatibility
351 # Python 3 compatibility
322 return codecs.escape_encode(s)[0]
352 return codecs.escape_encode(s)[0]
323
353
324 def unescapestr(s):
354 def unescapestr(s):
325 return codecs.escape_decode(s)[0]
355 return codecs.escape_decode(s)[0]
326
356
327 def forcebytestr(obj):
357 def forcebytestr(obj):
328 """Portably format an arbitrary object (e.g. exception) into a byte
358 """Portably format an arbitrary object (e.g. exception) into a byte
329 string."""
359 string."""
330 try:
360 try:
331 return pycompat.bytestr(obj)
361 return pycompat.bytestr(obj)
332 except UnicodeEncodeError:
362 except UnicodeEncodeError:
333 # non-ascii string, may be lossy
363 # non-ascii string, may be lossy
334 return pycompat.bytestr(encoding.strtolocal(str(obj)))
364 return pycompat.bytestr(encoding.strtolocal(str(obj)))
335
365
336 def uirepr(s):
366 def uirepr(s):
337 # Avoid double backslash in Windows path repr()
367 # Avoid double backslash in Windows path repr()
338 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
368 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
339
369
340 # delay import of textwrap
370 # delay import of textwrap
341 def _MBTextWrapper(**kwargs):
371 def _MBTextWrapper(**kwargs):
342 class tw(textwrap.TextWrapper):
372 class tw(textwrap.TextWrapper):
343 """
373 """
344 Extend TextWrapper for width-awareness.
374 Extend TextWrapper for width-awareness.
345
375
346 Neither number of 'bytes' in any encoding nor 'characters' is
376 Neither number of 'bytes' in any encoding nor 'characters' is
347 appropriate to calculate terminal columns for specified string.
377 appropriate to calculate terminal columns for specified string.
348
378
349 Original TextWrapper implementation uses built-in 'len()' directly,
379 Original TextWrapper implementation uses built-in 'len()' directly,
350 so overriding is needed to use width information of each characters.
380 so overriding is needed to use width information of each characters.
351
381
352 In addition, characters classified into 'ambiguous' width are
382 In addition, characters classified into 'ambiguous' width are
353 treated as wide in East Asian area, but as narrow in other.
383 treated as wide in East Asian area, but as narrow in other.
354
384
355 This requires use decision to determine width of such characters.
385 This requires use decision to determine width of such characters.
356 """
386 """
357 def _cutdown(self, ucstr, space_left):
387 def _cutdown(self, ucstr, space_left):
358 l = 0
388 l = 0
359 colwidth = encoding.ucolwidth
389 colwidth = encoding.ucolwidth
360 for i in xrange(len(ucstr)):
390 for i in xrange(len(ucstr)):
361 l += colwidth(ucstr[i])
391 l += colwidth(ucstr[i])
362 if space_left < l:
392 if space_left < l:
363 return (ucstr[:i], ucstr[i:])
393 return (ucstr[:i], ucstr[i:])
364 return ucstr, ''
394 return ucstr, ''
365
395
366 # overriding of base class
396 # overriding of base class
367 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
397 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
368 space_left = max(width - cur_len, 1)
398 space_left = max(width - cur_len, 1)
369
399
370 if self.break_long_words:
400 if self.break_long_words:
371 cut, res = self._cutdown(reversed_chunks[-1], space_left)
401 cut, res = self._cutdown(reversed_chunks[-1], space_left)
372 cur_line.append(cut)
402 cur_line.append(cut)
373 reversed_chunks[-1] = res
403 reversed_chunks[-1] = res
374 elif not cur_line:
404 elif not cur_line:
375 cur_line.append(reversed_chunks.pop())
405 cur_line.append(reversed_chunks.pop())
376
406
377 # this overriding code is imported from TextWrapper of Python 2.6
407 # this overriding code is imported from TextWrapper of Python 2.6
378 # to calculate columns of string by 'encoding.ucolwidth()'
408 # to calculate columns of string by 'encoding.ucolwidth()'
379 def _wrap_chunks(self, chunks):
409 def _wrap_chunks(self, chunks):
380 colwidth = encoding.ucolwidth
410 colwidth = encoding.ucolwidth
381
411
382 lines = []
412 lines = []
383 if self.width <= 0:
413 if self.width <= 0:
384 raise ValueError("invalid width %r (must be > 0)" % self.width)
414 raise ValueError("invalid width %r (must be > 0)" % self.width)
385
415
386 # Arrange in reverse order so items can be efficiently popped
416 # Arrange in reverse order so items can be efficiently popped
387 # from a stack of chucks.
417 # from a stack of chucks.
388 chunks.reverse()
418 chunks.reverse()
389
419
390 while chunks:
420 while chunks:
391
421
392 # Start the list of chunks that will make up the current line.
422 # Start the list of chunks that will make up the current line.
393 # cur_len is just the length of all the chunks in cur_line.
423 # cur_len is just the length of all the chunks in cur_line.
394 cur_line = []
424 cur_line = []
395 cur_len = 0
425 cur_len = 0
396
426
397 # Figure out which static string will prefix this line.
427 # Figure out which static string will prefix this line.
398 if lines:
428 if lines:
399 indent = self.subsequent_indent
429 indent = self.subsequent_indent
400 else:
430 else:
401 indent = self.initial_indent
431 indent = self.initial_indent
402
432
403 # Maximum width for this line.
433 # Maximum width for this line.
404 width = self.width - len(indent)
434 width = self.width - len(indent)
405
435
406 # First chunk on line is whitespace -- drop it, unless this
436 # First chunk on line is whitespace -- drop it, unless this
407 # is the very beginning of the text (i.e. no lines started yet).
437 # is the very beginning of the text (i.e. no lines started yet).
408 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
438 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
409 del chunks[-1]
439 del chunks[-1]
410
440
411 while chunks:
441 while chunks:
412 l = colwidth(chunks[-1])
442 l = colwidth(chunks[-1])
413
443
414 # Can at least squeeze this chunk onto the current line.
444 # Can at least squeeze this chunk onto the current line.
415 if cur_len + l <= width:
445 if cur_len + l <= width:
416 cur_line.append(chunks.pop())
446 cur_line.append(chunks.pop())
417 cur_len += l
447 cur_len += l
418
448
419 # Nope, this line is full.
449 # Nope, this line is full.
420 else:
450 else:
421 break
451 break
422
452
423 # The current line is full, and the next chunk is too big to
453 # The current line is full, and the next chunk is too big to
424 # fit on *any* line (not just this one).
454 # fit on *any* line (not just this one).
425 if chunks and colwidth(chunks[-1]) > width:
455 if chunks and colwidth(chunks[-1]) > width:
426 self._handle_long_word(chunks, cur_line, cur_len, width)
456 self._handle_long_word(chunks, cur_line, cur_len, width)
427
457
428 # If the last chunk on this line is all whitespace, drop it.
458 # If the last chunk on this line is all whitespace, drop it.
429 if (self.drop_whitespace and
459 if (self.drop_whitespace and
430 cur_line and cur_line[-1].strip() == r''):
460 cur_line and cur_line[-1].strip() == r''):
431 del cur_line[-1]
461 del cur_line[-1]
432
462
433 # Convert current line back to a string and store it in list
463 # Convert current line back to a string and store it in list
434 # of all lines (return value).
464 # of all lines (return value).
435 if cur_line:
465 if cur_line:
436 lines.append(indent + r''.join(cur_line))
466 lines.append(indent + r''.join(cur_line))
437
467
438 return lines
468 return lines
439
469
440 global _MBTextWrapper
470 global _MBTextWrapper
441 _MBTextWrapper = tw
471 _MBTextWrapper = tw
442 return tw(**kwargs)
472 return tw(**kwargs)
443
473
444 def wrap(line, width, initindent='', hangindent=''):
474 def wrap(line, width, initindent='', hangindent=''):
445 maxindent = max(len(hangindent), len(initindent))
475 maxindent = max(len(hangindent), len(initindent))
446 if width <= maxindent:
476 if width <= maxindent:
447 # adjust for weird terminal size
477 # adjust for weird terminal size
448 width = max(78, maxindent + 1)
478 width = max(78, maxindent + 1)
449 line = line.decode(pycompat.sysstr(encoding.encoding),
479 line = line.decode(pycompat.sysstr(encoding.encoding),
450 pycompat.sysstr(encoding.encodingmode))
480 pycompat.sysstr(encoding.encodingmode))
451 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
481 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
452 pycompat.sysstr(encoding.encodingmode))
482 pycompat.sysstr(encoding.encodingmode))
453 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
483 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
454 pycompat.sysstr(encoding.encodingmode))
484 pycompat.sysstr(encoding.encodingmode))
455 wrapper = _MBTextWrapper(width=width,
485 wrapper = _MBTextWrapper(width=width,
456 initial_indent=initindent,
486 initial_indent=initindent,
457 subsequent_indent=hangindent)
487 subsequent_indent=hangindent)
458 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
488 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
459
489
460 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
490 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
461 '0': False, 'no': False, 'false': False, 'off': False,
491 '0': False, 'no': False, 'false': False, 'off': False,
462 'never': False}
492 'never': False}
463
493
464 def parsebool(s):
494 def parsebool(s):
465 """Parse s into a boolean.
495 """Parse s into a boolean.
466
496
467 If s is not a valid boolean, returns None.
497 If s is not a valid boolean, returns None.
468 """
498 """
469 return _booleans.get(s.lower(), None)
499 return _booleans.get(s.lower(), None)
General Comments 0
You need to be logged in to leave comments. Login now