##// END OF EJS Templates
stringutil: add isauthorwellformed function...
Connor Sheehan -
r37172:f8e1f48d default
parent child Browse files
Show More
@@ -1,288 +1,311 b''
1 # stringutil.py - utility for generic string formatting, parsing, etc.
1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import codecs
12 import codecs
13 import re as remod
13 import re as remod
14 import textwrap
14 import textwrap
15
15
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .. import (
18 from .. import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 )
22 )
23
23
24 _DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
24 _DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
25 _DATA_ESCAPE_MAP.update({
25 _DATA_ESCAPE_MAP.update({
26 b'\\': b'\\\\',
26 b'\\': b'\\\\',
27 b'\r': br'\r',
27 b'\r': br'\r',
28 b'\n': br'\n',
28 b'\n': br'\n',
29 })
29 })
30 _DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
30 _DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
31
31
32 def escapedata(s):
32 def escapedata(s):
33 if isinstance(s, bytearray):
33 if isinstance(s, bytearray):
34 s = bytes(s)
34 s = bytes(s)
35
35
36 return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
36 return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
37
37
38 def binary(s):
38 def binary(s):
39 """return true if a string is binary data"""
39 """return true if a string is binary data"""
40 return bool(s and '\0' in s)
40 return bool(s and '\0' in s)
41
41
42 def stringmatcher(pattern, casesensitive=True):
42 def stringmatcher(pattern, casesensitive=True):
43 """
43 """
44 accepts a string, possibly starting with 're:' or 'literal:' prefix.
44 accepts a string, possibly starting with 're:' or 'literal:' prefix.
45 returns the matcher name, pattern, and matcher function.
45 returns the matcher name, pattern, and matcher function.
46 missing or unknown prefixes are treated as literal matches.
46 missing or unknown prefixes are treated as literal matches.
47
47
48 helper for tests:
48 helper for tests:
49 >>> def test(pattern, *tests):
49 >>> def test(pattern, *tests):
50 ... kind, pattern, matcher = stringmatcher(pattern)
50 ... kind, pattern, matcher = stringmatcher(pattern)
51 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
51 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
52 >>> def itest(pattern, *tests):
52 >>> def itest(pattern, *tests):
53 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
53 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
54 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
54 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
55
55
56 exact matching (no prefix):
56 exact matching (no prefix):
57 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
57 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
58 ('literal', 'abcdefg', [False, False, True])
58 ('literal', 'abcdefg', [False, False, True])
59
59
60 regex matching ('re:' prefix)
60 regex matching ('re:' prefix)
61 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
61 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
62 ('re', 'a.+b', [False, False, True])
62 ('re', 'a.+b', [False, False, True])
63
63
64 force exact matches ('literal:' prefix)
64 force exact matches ('literal:' prefix)
65 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
65 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
66 ('literal', 're:foobar', [False, True])
66 ('literal', 're:foobar', [False, True])
67
67
68 unknown prefixes are ignored and treated as literals
68 unknown prefixes are ignored and treated as literals
69 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
69 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
70 ('literal', 'foo:bar', [False, False, True])
70 ('literal', 'foo:bar', [False, False, True])
71
71
72 case insensitive regex matches
72 case insensitive regex matches
73 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
73 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
74 ('re', 'A.+b', [False, False, True])
74 ('re', 'A.+b', [False, False, True])
75
75
76 case insensitive literal matches
76 case insensitive literal matches
77 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
77 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
78 ('literal', 'ABCDEFG', [False, False, True])
78 ('literal', 'ABCDEFG', [False, False, True])
79 """
79 """
80 if pattern.startswith('re:'):
80 if pattern.startswith('re:'):
81 pattern = pattern[3:]
81 pattern = pattern[3:]
82 try:
82 try:
83 flags = 0
83 flags = 0
84 if not casesensitive:
84 if not casesensitive:
85 flags = remod.I
85 flags = remod.I
86 regex = remod.compile(pattern, flags)
86 regex = remod.compile(pattern, flags)
87 except remod.error as e:
87 except remod.error as e:
88 raise error.ParseError(_('invalid regular expression: %s')
88 raise error.ParseError(_('invalid regular expression: %s')
89 % e)
89 % e)
90 return 're', pattern, regex.search
90 return 're', pattern, regex.search
91 elif pattern.startswith('literal:'):
91 elif pattern.startswith('literal:'):
92 pattern = pattern[8:]
92 pattern = pattern[8:]
93
93
94 match = pattern.__eq__
94 match = pattern.__eq__
95
95
96 if not casesensitive:
96 if not casesensitive:
97 ipat = encoding.lower(pattern)
97 ipat = encoding.lower(pattern)
98 match = lambda s: ipat == encoding.lower(s)
98 match = lambda s: ipat == encoding.lower(s)
99 return 'literal', pattern, match
99 return 'literal', pattern, match
100
100
101 def shortuser(user):
101 def shortuser(user):
102 """Return a short representation of a user name or email address."""
102 """Return a short representation of a user name or email address."""
103 f = user.find('@')
103 f = user.find('@')
104 if f >= 0:
104 if f >= 0:
105 user = user[:f]
105 user = user[:f]
106 f = user.find('<')
106 f = user.find('<')
107 if f >= 0:
107 if f >= 0:
108 user = user[f + 1:]
108 user = user[f + 1:]
109 f = user.find(' ')
109 f = user.find(' ')
110 if f >= 0:
110 if f >= 0:
111 user = user[:f]
111 user = user[:f]
112 f = user.find('.')
112 f = user.find('.')
113 if f >= 0:
113 if f >= 0:
114 user = user[:f]
114 user = user[:f]
115 return user
115 return user
116
116
117 def emailuser(user):
117 def emailuser(user):
118 """Return the user portion of an email address."""
118 """Return the user portion of an email address."""
119 f = user.find('@')
119 f = user.find('@')
120 if f >= 0:
120 if f >= 0:
121 user = user[:f]
121 user = user[:f]
122 f = user.find('<')
122 f = user.find('<')
123 if f >= 0:
123 if f >= 0:
124 user = user[f + 1:]
124 user = user[f + 1:]
125 return user
125 return user
126
126
127 def email(author):
127 def email(author):
128 '''get email of author.'''
128 '''get email of author.'''
129 r = author.find('>')
129 r = author.find('>')
130 if r == -1:
130 if r == -1:
131 r = None
131 r = None
132 return author[author.find('<') + 1:r]
132 return author[author.find('<') + 1:r]
133
133
134 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
135
136 def isauthorwellformed(author):
137 '''Return True if the author field is well formed
138 (ie "Contributor Name <contrib@email.dom>")
139
140 >>> isauthorwellformed(b'Good Author <good@author.com>')
141 True
142 >>> isauthorwellformed(b'Author <good@author.com>')
143 True
144 >>> isauthorwellformed(b'Bad Author')
145 False
146 >>> isauthorwellformed(b'Bad Author <author@author.com')
147 False
148 >>> isauthorwellformed(b'Bad Author author@author.com')
149 False
150 >>> isauthorwellformed(b'<author@author.com>')
151 False
152 >>> isauthorwellformed(b'Bad Author <author>')
153 False
154 '''
155 return _correctauthorformat.match(author) is not None
156
134 def ellipsis(text, maxlength=400):
157 def ellipsis(text, maxlength=400):
135 """Trim string to at most maxlength (default: 400) columns in display."""
158 """Trim string to at most maxlength (default: 400) columns in display."""
136 return encoding.trim(text, maxlength, ellipsis='...')
159 return encoding.trim(text, maxlength, ellipsis='...')
137
160
138 def escapestr(s):
161 def escapestr(s):
139 # call underlying function of s.encode('string_escape') directly for
162 # call underlying function of s.encode('string_escape') directly for
140 # Python 3 compatibility
163 # Python 3 compatibility
141 return codecs.escape_encode(s)[0]
164 return codecs.escape_encode(s)[0]
142
165
143 def unescapestr(s):
166 def unescapestr(s):
144 return codecs.escape_decode(s)[0]
167 return codecs.escape_decode(s)[0]
145
168
146 def forcebytestr(obj):
169 def forcebytestr(obj):
147 """Portably format an arbitrary object (e.g. exception) into a byte
170 """Portably format an arbitrary object (e.g. exception) into a byte
148 string."""
171 string."""
149 try:
172 try:
150 return pycompat.bytestr(obj)
173 return pycompat.bytestr(obj)
151 except UnicodeEncodeError:
174 except UnicodeEncodeError:
152 # non-ascii string, may be lossy
175 # non-ascii string, may be lossy
153 return pycompat.bytestr(encoding.strtolocal(str(obj)))
176 return pycompat.bytestr(encoding.strtolocal(str(obj)))
154
177
155 def uirepr(s):
178 def uirepr(s):
156 # Avoid double backslash in Windows path repr()
179 # Avoid double backslash in Windows path repr()
157 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
180 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
158
181
159 # delay import of textwrap
182 # delay import of textwrap
160 def _MBTextWrapper(**kwargs):
183 def _MBTextWrapper(**kwargs):
161 class tw(textwrap.TextWrapper):
184 class tw(textwrap.TextWrapper):
162 """
185 """
163 Extend TextWrapper for width-awareness.
186 Extend TextWrapper for width-awareness.
164
187
165 Neither number of 'bytes' in any encoding nor 'characters' is
188 Neither number of 'bytes' in any encoding nor 'characters' is
166 appropriate to calculate terminal columns for specified string.
189 appropriate to calculate terminal columns for specified string.
167
190
168 Original TextWrapper implementation uses built-in 'len()' directly,
191 Original TextWrapper implementation uses built-in 'len()' directly,
169 so overriding is needed to use width information of each characters.
192 so overriding is needed to use width information of each characters.
170
193
171 In addition, characters classified into 'ambiguous' width are
194 In addition, characters classified into 'ambiguous' width are
172 treated as wide in East Asian area, but as narrow in other.
195 treated as wide in East Asian area, but as narrow in other.
173
196
174 This requires use decision to determine width of such characters.
197 This requires use decision to determine width of such characters.
175 """
198 """
176 def _cutdown(self, ucstr, space_left):
199 def _cutdown(self, ucstr, space_left):
177 l = 0
200 l = 0
178 colwidth = encoding.ucolwidth
201 colwidth = encoding.ucolwidth
179 for i in xrange(len(ucstr)):
202 for i in xrange(len(ucstr)):
180 l += colwidth(ucstr[i])
203 l += colwidth(ucstr[i])
181 if space_left < l:
204 if space_left < l:
182 return (ucstr[:i], ucstr[i:])
205 return (ucstr[:i], ucstr[i:])
183 return ucstr, ''
206 return ucstr, ''
184
207
185 # overriding of base class
208 # overriding of base class
186 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
209 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
187 space_left = max(width - cur_len, 1)
210 space_left = max(width - cur_len, 1)
188
211
189 if self.break_long_words:
212 if self.break_long_words:
190 cut, res = self._cutdown(reversed_chunks[-1], space_left)
213 cut, res = self._cutdown(reversed_chunks[-1], space_left)
191 cur_line.append(cut)
214 cur_line.append(cut)
192 reversed_chunks[-1] = res
215 reversed_chunks[-1] = res
193 elif not cur_line:
216 elif not cur_line:
194 cur_line.append(reversed_chunks.pop())
217 cur_line.append(reversed_chunks.pop())
195
218
196 # this overriding code is imported from TextWrapper of Python 2.6
219 # this overriding code is imported from TextWrapper of Python 2.6
197 # to calculate columns of string by 'encoding.ucolwidth()'
220 # to calculate columns of string by 'encoding.ucolwidth()'
198 def _wrap_chunks(self, chunks):
221 def _wrap_chunks(self, chunks):
199 colwidth = encoding.ucolwidth
222 colwidth = encoding.ucolwidth
200
223
201 lines = []
224 lines = []
202 if self.width <= 0:
225 if self.width <= 0:
203 raise ValueError("invalid width %r (must be > 0)" % self.width)
226 raise ValueError("invalid width %r (must be > 0)" % self.width)
204
227
205 # Arrange in reverse order so items can be efficiently popped
228 # Arrange in reverse order so items can be efficiently popped
206 # from a stack of chucks.
229 # from a stack of chucks.
207 chunks.reverse()
230 chunks.reverse()
208
231
209 while chunks:
232 while chunks:
210
233
211 # Start the list of chunks that will make up the current line.
234 # Start the list of chunks that will make up the current line.
212 # cur_len is just the length of all the chunks in cur_line.
235 # cur_len is just the length of all the chunks in cur_line.
213 cur_line = []
236 cur_line = []
214 cur_len = 0
237 cur_len = 0
215
238
216 # Figure out which static string will prefix this line.
239 # Figure out which static string will prefix this line.
217 if lines:
240 if lines:
218 indent = self.subsequent_indent
241 indent = self.subsequent_indent
219 else:
242 else:
220 indent = self.initial_indent
243 indent = self.initial_indent
221
244
222 # Maximum width for this line.
245 # Maximum width for this line.
223 width = self.width - len(indent)
246 width = self.width - len(indent)
224
247
225 # First chunk on line is whitespace -- drop it, unless this
248 # First chunk on line is whitespace -- drop it, unless this
226 # is the very beginning of the text (i.e. no lines started yet).
249 # is the very beginning of the text (i.e. no lines started yet).
227 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
250 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
228 del chunks[-1]
251 del chunks[-1]
229
252
230 while chunks:
253 while chunks:
231 l = colwidth(chunks[-1])
254 l = colwidth(chunks[-1])
232
255
233 # Can at least squeeze this chunk onto the current line.
256 # Can at least squeeze this chunk onto the current line.
234 if cur_len + l <= width:
257 if cur_len + l <= width:
235 cur_line.append(chunks.pop())
258 cur_line.append(chunks.pop())
236 cur_len += l
259 cur_len += l
237
260
238 # Nope, this line is full.
261 # Nope, this line is full.
239 else:
262 else:
240 break
263 break
241
264
242 # The current line is full, and the next chunk is too big to
265 # The current line is full, and the next chunk is too big to
243 # fit on *any* line (not just this one).
266 # fit on *any* line (not just this one).
244 if chunks and colwidth(chunks[-1]) > width:
267 if chunks and colwidth(chunks[-1]) > width:
245 self._handle_long_word(chunks, cur_line, cur_len, width)
268 self._handle_long_word(chunks, cur_line, cur_len, width)
246
269
247 # If the last chunk on this line is all whitespace, drop it.
270 # If the last chunk on this line is all whitespace, drop it.
248 if (self.drop_whitespace and
271 if (self.drop_whitespace and
249 cur_line and cur_line[-1].strip() == r''):
272 cur_line and cur_line[-1].strip() == r''):
250 del cur_line[-1]
273 del cur_line[-1]
251
274
252 # Convert current line back to a string and store it in list
275 # Convert current line back to a string and store it in list
253 # of all lines (return value).
276 # of all lines (return value).
254 if cur_line:
277 if cur_line:
255 lines.append(indent + r''.join(cur_line))
278 lines.append(indent + r''.join(cur_line))
256
279
257 return lines
280 return lines
258
281
259 global _MBTextWrapper
282 global _MBTextWrapper
260 _MBTextWrapper = tw
283 _MBTextWrapper = tw
261 return tw(**kwargs)
284 return tw(**kwargs)
262
285
263 def wrap(line, width, initindent='', hangindent=''):
286 def wrap(line, width, initindent='', hangindent=''):
264 maxindent = max(len(hangindent), len(initindent))
287 maxindent = max(len(hangindent), len(initindent))
265 if width <= maxindent:
288 if width <= maxindent:
266 # adjust for weird terminal size
289 # adjust for weird terminal size
267 width = max(78, maxindent + 1)
290 width = max(78, maxindent + 1)
268 line = line.decode(pycompat.sysstr(encoding.encoding),
291 line = line.decode(pycompat.sysstr(encoding.encoding),
269 pycompat.sysstr(encoding.encodingmode))
292 pycompat.sysstr(encoding.encodingmode))
270 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
293 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
271 pycompat.sysstr(encoding.encodingmode))
294 pycompat.sysstr(encoding.encodingmode))
272 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
295 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
273 pycompat.sysstr(encoding.encodingmode))
296 pycompat.sysstr(encoding.encodingmode))
274 wrapper = _MBTextWrapper(width=width,
297 wrapper = _MBTextWrapper(width=width,
275 initial_indent=initindent,
298 initial_indent=initindent,
276 subsequent_indent=hangindent)
299 subsequent_indent=hangindent)
277 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
300 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
278
301
279 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
302 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
280 '0': False, 'no': False, 'false': False, 'off': False,
303 '0': False, 'no': False, 'false': False, 'off': False,
281 'never': False}
304 'never': False}
282
305
283 def parsebool(s):
306 def parsebool(s):
284 """Parse s into a boolean.
307 """Parse s into a boolean.
285
308
286 If s is not a valid boolean, returns None.
309 If s is not a valid boolean, returns None.
287 """
310 """
288 return _booleans.get(s.lower(), None)
311 return _booleans.get(s.lower(), None)
General Comments 0
You need to be logged in to leave comments. Login now