##// END OF EJS Templates
override vformat instead of _vformat...
Min RK -
Show More
@@ -1,782 +1,780 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import os
12 import os
13 import re
13 import re
14 import sys
14 import sys
15 import textwrap
15 import textwrap
16 from string import Formatter
16 from string import Formatter
17
17
18 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
18 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
19 from IPython.utils import py3compat
19 from IPython.utils import py3compat
20
20
21 # datetime.strftime date format for ipython
21 # datetime.strftime date format for ipython
22 if sys.platform == 'win32':
22 if sys.platform == 'win32':
23 date_format = "%B %d, %Y"
23 date_format = "%B %d, %Y"
24 else:
24 else:
25 date_format = "%B %-d, %Y"
25 date_format = "%B %-d, %Y"
26
26
27 class LSString(str):
27 class LSString(str):
28 """String derivative with a special access attributes.
28 """String derivative with a special access attributes.
29
29
30 These are normal strings, but with the special attributes:
30 These are normal strings, but with the special attributes:
31
31
32 .l (or .list) : value as list (split on newlines).
32 .l (or .list) : value as list (split on newlines).
33 .n (or .nlstr): original value (the string itself).
33 .n (or .nlstr): original value (the string itself).
34 .s (or .spstr): value as whitespace-separated string.
34 .s (or .spstr): value as whitespace-separated string.
35 .p (or .paths): list of path objects (requires path.py package)
35 .p (or .paths): list of path objects (requires path.py package)
36
36
37 Any values which require transformations are computed only once and
37 Any values which require transformations are computed only once and
38 cached.
38 cached.
39
39
40 Such strings are very useful to efficiently interact with the shell, which
40 Such strings are very useful to efficiently interact with the shell, which
41 typically only understands whitespace-separated options for commands."""
41 typically only understands whitespace-separated options for commands."""
42
42
43 def get_list(self):
43 def get_list(self):
44 try:
44 try:
45 return self.__list
45 return self.__list
46 except AttributeError:
46 except AttributeError:
47 self.__list = self.split('\n')
47 self.__list = self.split('\n')
48 return self.__list
48 return self.__list
49
49
50 l = list = property(get_list)
50 l = list = property(get_list)
51
51
52 def get_spstr(self):
52 def get_spstr(self):
53 try:
53 try:
54 return self.__spstr
54 return self.__spstr
55 except AttributeError:
55 except AttributeError:
56 self.__spstr = self.replace('\n',' ')
56 self.__spstr = self.replace('\n',' ')
57 return self.__spstr
57 return self.__spstr
58
58
59 s = spstr = property(get_spstr)
59 s = spstr = property(get_spstr)
60
60
61 def get_nlstr(self):
61 def get_nlstr(self):
62 return self
62 return self
63
63
64 n = nlstr = property(get_nlstr)
64 n = nlstr = property(get_nlstr)
65
65
66 def get_paths(self):
66 def get_paths(self):
67 from path import path
67 from path import path
68 try:
68 try:
69 return self.__paths
69 return self.__paths
70 except AttributeError:
70 except AttributeError:
71 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
71 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
72 return self.__paths
72 return self.__paths
73
73
74 p = paths = property(get_paths)
74 p = paths = property(get_paths)
75
75
76 # FIXME: We need to reimplement type specific displayhook and then add this
76 # FIXME: We need to reimplement type specific displayhook and then add this
77 # back as a custom printer. This should also be moved outside utils into the
77 # back as a custom printer. This should also be moved outside utils into the
78 # core.
78 # core.
79
79
80 # def print_lsstring(arg):
80 # def print_lsstring(arg):
81 # """ Prettier (non-repr-like) and more informative printer for LSString """
81 # """ Prettier (non-repr-like) and more informative printer for LSString """
82 # print "LSString (.p, .n, .l, .s available). Value:"
82 # print "LSString (.p, .n, .l, .s available). Value:"
83 # print arg
83 # print arg
84 #
84 #
85 #
85 #
86 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
86 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
87
87
88
88
89 class SList(list):
89 class SList(list):
90 """List derivative with a special access attributes.
90 """List derivative with a special access attributes.
91
91
92 These are normal lists, but with the special attributes:
92 These are normal lists, but with the special attributes:
93
93
94 * .l (or .list) : value as list (the list itself).
94 * .l (or .list) : value as list (the list itself).
95 * .n (or .nlstr): value as a string, joined on newlines.
95 * .n (or .nlstr): value as a string, joined on newlines.
96 * .s (or .spstr): value as a string, joined on spaces.
96 * .s (or .spstr): value as a string, joined on spaces.
97 * .p (or .paths): list of path objects (requires path.py package)
97 * .p (or .paths): list of path objects (requires path.py package)
98
98
99 Any values which require transformations are computed only once and
99 Any values which require transformations are computed only once and
100 cached."""
100 cached."""
101
101
102 def get_list(self):
102 def get_list(self):
103 return self
103 return self
104
104
105 l = list = property(get_list)
105 l = list = property(get_list)
106
106
107 def get_spstr(self):
107 def get_spstr(self):
108 try:
108 try:
109 return self.__spstr
109 return self.__spstr
110 except AttributeError:
110 except AttributeError:
111 self.__spstr = ' '.join(self)
111 self.__spstr = ' '.join(self)
112 return self.__spstr
112 return self.__spstr
113
113
114 s = spstr = property(get_spstr)
114 s = spstr = property(get_spstr)
115
115
116 def get_nlstr(self):
116 def get_nlstr(self):
117 try:
117 try:
118 return self.__nlstr
118 return self.__nlstr
119 except AttributeError:
119 except AttributeError:
120 self.__nlstr = '\n'.join(self)
120 self.__nlstr = '\n'.join(self)
121 return self.__nlstr
121 return self.__nlstr
122
122
123 n = nlstr = property(get_nlstr)
123 n = nlstr = property(get_nlstr)
124
124
125 def get_paths(self):
125 def get_paths(self):
126 from path import path
126 from path import path
127 try:
127 try:
128 return self.__paths
128 return self.__paths
129 except AttributeError:
129 except AttributeError:
130 self.__paths = [path(p) for p in self if os.path.exists(p)]
130 self.__paths = [path(p) for p in self if os.path.exists(p)]
131 return self.__paths
131 return self.__paths
132
132
133 p = paths = property(get_paths)
133 p = paths = property(get_paths)
134
134
135 def grep(self, pattern, prune = False, field = None):
135 def grep(self, pattern, prune = False, field = None):
136 """ Return all strings matching 'pattern' (a regex or callable)
136 """ Return all strings matching 'pattern' (a regex or callable)
137
137
138 This is case-insensitive. If prune is true, return all items
138 This is case-insensitive. If prune is true, return all items
139 NOT matching the pattern.
139 NOT matching the pattern.
140
140
141 If field is specified, the match must occur in the specified
141 If field is specified, the match must occur in the specified
142 whitespace-separated field.
142 whitespace-separated field.
143
143
144 Examples::
144 Examples::
145
145
146 a.grep( lambda x: x.startswith('C') )
146 a.grep( lambda x: x.startswith('C') )
147 a.grep('Cha.*log', prune=1)
147 a.grep('Cha.*log', prune=1)
148 a.grep('chm', field=-1)
148 a.grep('chm', field=-1)
149 """
149 """
150
150
151 def match_target(s):
151 def match_target(s):
152 if field is None:
152 if field is None:
153 return s
153 return s
154 parts = s.split()
154 parts = s.split()
155 try:
155 try:
156 tgt = parts[field]
156 tgt = parts[field]
157 return tgt
157 return tgt
158 except IndexError:
158 except IndexError:
159 return ""
159 return ""
160
160
161 if isinstance(pattern, py3compat.string_types):
161 if isinstance(pattern, py3compat.string_types):
162 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
162 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
163 else:
163 else:
164 pred = pattern
164 pred = pattern
165 if not prune:
165 if not prune:
166 return SList([el for el in self if pred(match_target(el))])
166 return SList([el for el in self if pred(match_target(el))])
167 else:
167 else:
168 return SList([el for el in self if not pred(match_target(el))])
168 return SList([el for el in self if not pred(match_target(el))])
169
169
170 def fields(self, *fields):
170 def fields(self, *fields):
171 """ Collect whitespace-separated fields from string list
171 """ Collect whitespace-separated fields from string list
172
172
173 Allows quick awk-like usage of string lists.
173 Allows quick awk-like usage of string lists.
174
174
175 Example data (in var a, created by 'a = !ls -l')::
175 Example data (in var a, created by 'a = !ls -l')::
176
176
177 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
177 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
178 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
178 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
179
179
180 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
180 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
181 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
181 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
182 (note the joining by space).
182 (note the joining by space).
183 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
183 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
184
184
185 IndexErrors are ignored.
185 IndexErrors are ignored.
186
186
187 Without args, fields() just split()'s the strings.
187 Without args, fields() just split()'s the strings.
188 """
188 """
189 if len(fields) == 0:
189 if len(fields) == 0:
190 return [el.split() for el in self]
190 return [el.split() for el in self]
191
191
192 res = SList()
192 res = SList()
193 for el in [f.split() for f in self]:
193 for el in [f.split() for f in self]:
194 lineparts = []
194 lineparts = []
195
195
196 for fd in fields:
196 for fd in fields:
197 try:
197 try:
198 lineparts.append(el[fd])
198 lineparts.append(el[fd])
199 except IndexError:
199 except IndexError:
200 pass
200 pass
201 if lineparts:
201 if lineparts:
202 res.append(" ".join(lineparts))
202 res.append(" ".join(lineparts))
203
203
204 return res
204 return res
205
205
206 def sort(self,field= None, nums = False):
206 def sort(self,field= None, nums = False):
207 """ sort by specified fields (see fields())
207 """ sort by specified fields (see fields())
208
208
209 Example::
209 Example::
210
210
211 a.sort(1, nums = True)
211 a.sort(1, nums = True)
212
212
213 Sorts a by second field, in numerical order (so that 21 > 3)
213 Sorts a by second field, in numerical order (so that 21 > 3)
214
214
215 """
215 """
216
216
217 #decorate, sort, undecorate
217 #decorate, sort, undecorate
218 if field is not None:
218 if field is not None:
219 dsu = [[SList([line]).fields(field), line] for line in self]
219 dsu = [[SList([line]).fields(field), line] for line in self]
220 else:
220 else:
221 dsu = [[line, line] for line in self]
221 dsu = [[line, line] for line in self]
222 if nums:
222 if nums:
223 for i in range(len(dsu)):
223 for i in range(len(dsu)):
224 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
224 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
225 try:
225 try:
226 n = int(numstr)
226 n = int(numstr)
227 except ValueError:
227 except ValueError:
228 n = 0
228 n = 0
229 dsu[i][0] = n
229 dsu[i][0] = n
230
230
231
231
232 dsu.sort()
232 dsu.sort()
233 return SList([t[1] for t in dsu])
233 return SList([t[1] for t in dsu])
234
234
235
235
236 # FIXME: We need to reimplement type specific displayhook and then add this
236 # FIXME: We need to reimplement type specific displayhook and then add this
237 # back as a custom printer. This should also be moved outside utils into the
237 # back as a custom printer. This should also be moved outside utils into the
238 # core.
238 # core.
239
239
240 # def print_slist(arg):
240 # def print_slist(arg):
241 # """ Prettier (non-repr-like) and more informative printer for SList """
241 # """ Prettier (non-repr-like) and more informative printer for SList """
242 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
242 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
243 # if hasattr(arg, 'hideonce') and arg.hideonce:
243 # if hasattr(arg, 'hideonce') and arg.hideonce:
244 # arg.hideonce = False
244 # arg.hideonce = False
245 # return
245 # return
246 #
246 #
247 # nlprint(arg) # This was a nested list printer, now removed.
247 # nlprint(arg) # This was a nested list printer, now removed.
248 #
248 #
249 # print_slist = result_display.when_type(SList)(print_slist)
249 # print_slist = result_display.when_type(SList)(print_slist)
250
250
251
251
252 def indent(instr,nspaces=4, ntabs=0, flatten=False):
252 def indent(instr,nspaces=4, ntabs=0, flatten=False):
253 """Indent a string a given number of spaces or tabstops.
253 """Indent a string a given number of spaces or tabstops.
254
254
255 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
255 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
256
256
257 Parameters
257 Parameters
258 ----------
258 ----------
259
259
260 instr : basestring
260 instr : basestring
261 The string to be indented.
261 The string to be indented.
262 nspaces : int (default: 4)
262 nspaces : int (default: 4)
263 The number of spaces to be indented.
263 The number of spaces to be indented.
264 ntabs : int (default: 0)
264 ntabs : int (default: 0)
265 The number of tabs to be indented.
265 The number of tabs to be indented.
266 flatten : bool (default: False)
266 flatten : bool (default: False)
267 Whether to scrub existing indentation. If True, all lines will be
267 Whether to scrub existing indentation. If True, all lines will be
268 aligned to the same indentation. If False, existing indentation will
268 aligned to the same indentation. If False, existing indentation will
269 be strictly increased.
269 be strictly increased.
270
270
271 Returns
271 Returns
272 -------
272 -------
273
273
274 str|unicode : string indented by ntabs and nspaces.
274 str|unicode : string indented by ntabs and nspaces.
275
275
276 """
276 """
277 if instr is None:
277 if instr is None:
278 return
278 return
279 ind = '\t'*ntabs+' '*nspaces
279 ind = '\t'*ntabs+' '*nspaces
280 if flatten:
280 if flatten:
281 pat = re.compile(r'^\s*', re.MULTILINE)
281 pat = re.compile(r'^\s*', re.MULTILINE)
282 else:
282 else:
283 pat = re.compile(r'^', re.MULTILINE)
283 pat = re.compile(r'^', re.MULTILINE)
284 outstr = re.sub(pat, ind, instr)
284 outstr = re.sub(pat, ind, instr)
285 if outstr.endswith(os.linesep+ind):
285 if outstr.endswith(os.linesep+ind):
286 return outstr[:-len(ind)]
286 return outstr[:-len(ind)]
287 else:
287 else:
288 return outstr
288 return outstr
289
289
290
290
291 def list_strings(arg):
291 def list_strings(arg):
292 """Always return a list of strings, given a string or list of strings
292 """Always return a list of strings, given a string or list of strings
293 as input.
293 as input.
294
294
295 Examples
295 Examples
296 --------
296 --------
297 ::
297 ::
298
298
299 In [7]: list_strings('A single string')
299 In [7]: list_strings('A single string')
300 Out[7]: ['A single string']
300 Out[7]: ['A single string']
301
301
302 In [8]: list_strings(['A single string in a list'])
302 In [8]: list_strings(['A single string in a list'])
303 Out[8]: ['A single string in a list']
303 Out[8]: ['A single string in a list']
304
304
305 In [9]: list_strings(['A','list','of','strings'])
305 In [9]: list_strings(['A','list','of','strings'])
306 Out[9]: ['A', 'list', 'of', 'strings']
306 Out[9]: ['A', 'list', 'of', 'strings']
307 """
307 """
308
308
309 if isinstance(arg, py3compat.string_types): return [arg]
309 if isinstance(arg, py3compat.string_types): return [arg]
310 else: return arg
310 else: return arg
311
311
312
312
313 def marquee(txt='',width=78,mark='*'):
313 def marquee(txt='',width=78,mark='*'):
314 """Return the input string centered in a 'marquee'.
314 """Return the input string centered in a 'marquee'.
315
315
316 Examples
316 Examples
317 --------
317 --------
318 ::
318 ::
319
319
320 In [16]: marquee('A test',40)
320 In [16]: marquee('A test',40)
321 Out[16]: '**************** A test ****************'
321 Out[16]: '**************** A test ****************'
322
322
323 In [17]: marquee('A test',40,'-')
323 In [17]: marquee('A test',40,'-')
324 Out[17]: '---------------- A test ----------------'
324 Out[17]: '---------------- A test ----------------'
325
325
326 In [18]: marquee('A test',40,' ')
326 In [18]: marquee('A test',40,' ')
327 Out[18]: ' A test '
327 Out[18]: ' A test '
328
328
329 """
329 """
330 if not txt:
330 if not txt:
331 return (mark*width)[:width]
331 return (mark*width)[:width]
332 nmark = (width-len(txt)-2)//len(mark)//2
332 nmark = (width-len(txt)-2)//len(mark)//2
333 if nmark < 0: nmark =0
333 if nmark < 0: nmark =0
334 marks = mark*nmark
334 marks = mark*nmark
335 return '%s %s %s' % (marks,txt,marks)
335 return '%s %s %s' % (marks,txt,marks)
336
336
337
337
338 ini_spaces_re = re.compile(r'^(\s+)')
338 ini_spaces_re = re.compile(r'^(\s+)')
339
339
340 def num_ini_spaces(strng):
340 def num_ini_spaces(strng):
341 """Return the number of initial spaces in a string"""
341 """Return the number of initial spaces in a string"""
342
342
343 ini_spaces = ini_spaces_re.match(strng)
343 ini_spaces = ini_spaces_re.match(strng)
344 if ini_spaces:
344 if ini_spaces:
345 return ini_spaces.end()
345 return ini_spaces.end()
346 else:
346 else:
347 return 0
347 return 0
348
348
349
349
350 def format_screen(strng):
350 def format_screen(strng):
351 """Format a string for screen printing.
351 """Format a string for screen printing.
352
352
353 This removes some latex-type format codes."""
353 This removes some latex-type format codes."""
354 # Paragraph continue
354 # Paragraph continue
355 par_re = re.compile(r'\\$',re.MULTILINE)
355 par_re = re.compile(r'\\$',re.MULTILINE)
356 strng = par_re.sub('',strng)
356 strng = par_re.sub('',strng)
357 return strng
357 return strng
358
358
359
359
360 def dedent(text):
360 def dedent(text):
361 """Equivalent of textwrap.dedent that ignores unindented first line.
361 """Equivalent of textwrap.dedent that ignores unindented first line.
362
362
363 This means it will still dedent strings like:
363 This means it will still dedent strings like:
364 '''foo
364 '''foo
365 is a bar
365 is a bar
366 '''
366 '''
367
367
368 For use in wrap_paragraphs.
368 For use in wrap_paragraphs.
369 """
369 """
370
370
371 if text.startswith('\n'):
371 if text.startswith('\n'):
372 # text starts with blank line, don't ignore the first line
372 # text starts with blank line, don't ignore the first line
373 return textwrap.dedent(text)
373 return textwrap.dedent(text)
374
374
375 # split first line
375 # split first line
376 splits = text.split('\n',1)
376 splits = text.split('\n',1)
377 if len(splits) == 1:
377 if len(splits) == 1:
378 # only one line
378 # only one line
379 return textwrap.dedent(text)
379 return textwrap.dedent(text)
380
380
381 first, rest = splits
381 first, rest = splits
382 # dedent everything but the first line
382 # dedent everything but the first line
383 rest = textwrap.dedent(rest)
383 rest = textwrap.dedent(rest)
384 return '\n'.join([first, rest])
384 return '\n'.join([first, rest])
385
385
386
386
387 def wrap_paragraphs(text, ncols=80):
387 def wrap_paragraphs(text, ncols=80):
388 """Wrap multiple paragraphs to fit a specified width.
388 """Wrap multiple paragraphs to fit a specified width.
389
389
390 This is equivalent to textwrap.wrap, but with support for multiple
390 This is equivalent to textwrap.wrap, but with support for multiple
391 paragraphs, as separated by empty lines.
391 paragraphs, as separated by empty lines.
392
392
393 Returns
393 Returns
394 -------
394 -------
395
395
396 list of complete paragraphs, wrapped to fill `ncols` columns.
396 list of complete paragraphs, wrapped to fill `ncols` columns.
397 """
397 """
398 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
398 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
399 text = dedent(text).strip()
399 text = dedent(text).strip()
400 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
400 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
401 out_ps = []
401 out_ps = []
402 indent_re = re.compile(r'\n\s+', re.MULTILINE)
402 indent_re = re.compile(r'\n\s+', re.MULTILINE)
403 for p in paragraphs:
403 for p in paragraphs:
404 # presume indentation that survives dedent is meaningful formatting,
404 # presume indentation that survives dedent is meaningful formatting,
405 # so don't fill unless text is flush.
405 # so don't fill unless text is flush.
406 if indent_re.search(p) is None:
406 if indent_re.search(p) is None:
407 # wrap paragraph
407 # wrap paragraph
408 p = textwrap.fill(p, ncols)
408 p = textwrap.fill(p, ncols)
409 out_ps.append(p)
409 out_ps.append(p)
410 return out_ps
410 return out_ps
411
411
412
412
413 def long_substr(data):
413 def long_substr(data):
414 """Return the longest common substring in a list of strings.
414 """Return the longest common substring in a list of strings.
415
415
416 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
416 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
417 """
417 """
418 substr = ''
418 substr = ''
419 if len(data) > 1 and len(data[0]) > 0:
419 if len(data) > 1 and len(data[0]) > 0:
420 for i in range(len(data[0])):
420 for i in range(len(data[0])):
421 for j in range(len(data[0])-i+1):
421 for j in range(len(data[0])-i+1):
422 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
422 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
423 substr = data[0][i:i+j]
423 substr = data[0][i:i+j]
424 elif len(data) == 1:
424 elif len(data) == 1:
425 substr = data[0]
425 substr = data[0]
426 return substr
426 return substr
427
427
428
428
429 def strip_email_quotes(text):
429 def strip_email_quotes(text):
430 """Strip leading email quotation characters ('>').
430 """Strip leading email quotation characters ('>').
431
431
432 Removes any combination of leading '>' interspersed with whitespace that
432 Removes any combination of leading '>' interspersed with whitespace that
433 appears *identically* in all lines of the input text.
433 appears *identically* in all lines of the input text.
434
434
435 Parameters
435 Parameters
436 ----------
436 ----------
437 text : str
437 text : str
438
438
439 Examples
439 Examples
440 --------
440 --------
441
441
442 Simple uses::
442 Simple uses::
443
443
444 In [2]: strip_email_quotes('> > text')
444 In [2]: strip_email_quotes('> > text')
445 Out[2]: 'text'
445 Out[2]: 'text'
446
446
447 In [3]: strip_email_quotes('> > text\\n> > more')
447 In [3]: strip_email_quotes('> > text\\n> > more')
448 Out[3]: 'text\\nmore'
448 Out[3]: 'text\\nmore'
449
449
450 Note how only the common prefix that appears in all lines is stripped::
450 Note how only the common prefix that appears in all lines is stripped::
451
451
452 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
452 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
453 Out[4]: '> text\\n> more\\nmore...'
453 Out[4]: '> text\\n> more\\nmore...'
454
454
455 So if any line has no quote marks ('>') , then none are stripped from any
455 So if any line has no quote marks ('>') , then none are stripped from any
456 of them ::
456 of them ::
457
457
458 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
458 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
459 Out[5]: '> > text\\n> > more\\nlast different'
459 Out[5]: '> > text\\n> > more\\nlast different'
460 """
460 """
461 lines = text.splitlines()
461 lines = text.splitlines()
462 matches = set()
462 matches = set()
463 for line in lines:
463 for line in lines:
464 prefix = re.match(r'^(\s*>[ >]*)', line)
464 prefix = re.match(r'^(\s*>[ >]*)', line)
465 if prefix:
465 if prefix:
466 matches.add(prefix.group(1))
466 matches.add(prefix.group(1))
467 else:
467 else:
468 break
468 break
469 else:
469 else:
470 prefix = long_substr(list(matches))
470 prefix = long_substr(list(matches))
471 if prefix:
471 if prefix:
472 strip = len(prefix)
472 strip = len(prefix)
473 text = '\n'.join([ ln[strip:] for ln in lines])
473 text = '\n'.join([ ln[strip:] for ln in lines])
474 return text
474 return text
475
475
476 def strip_ansi(source):
476 def strip_ansi(source):
477 """
477 """
478 Remove ansi escape codes from text.
478 Remove ansi escape codes from text.
479
479
480 Parameters
480 Parameters
481 ----------
481 ----------
482 source : str
482 source : str
483 Source to remove the ansi from
483 Source to remove the ansi from
484 """
484 """
485 return re.sub(r'\033\[(\d|;)+?m', '', source)
485 return re.sub(r'\033\[(\d|;)+?m', '', source)
486
486
487
487
488 class EvalFormatter(Formatter):
488 class EvalFormatter(Formatter):
489 """A String Formatter that allows evaluation of simple expressions.
489 """A String Formatter that allows evaluation of simple expressions.
490
490
491 Note that this version interprets a : as specifying a format string (as per
491 Note that this version interprets a : as specifying a format string (as per
492 standard string formatting), so if slicing is required, you must explicitly
492 standard string formatting), so if slicing is required, you must explicitly
493 create a slice.
493 create a slice.
494
494
495 This is to be used in templating cases, such as the parallel batch
495 This is to be used in templating cases, such as the parallel batch
496 script templates, where simple arithmetic on arguments is useful.
496 script templates, where simple arithmetic on arguments is useful.
497
497
498 Examples
498 Examples
499 --------
499 --------
500 ::
500 ::
501
501
502 In [1]: f = EvalFormatter()
502 In [1]: f = EvalFormatter()
503 In [2]: f.format('{n//4}', n=8)
503 In [2]: f.format('{n//4}', n=8)
504 Out[2]: '2'
504 Out[2]: '2'
505
505
506 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
506 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
507 Out[3]: 'll'
507 Out[3]: 'll'
508 """
508 """
509 def get_field(self, name, args, kwargs):
509 def get_field(self, name, args, kwargs):
510 v = eval(name, kwargs)
510 v = eval(name, kwargs)
511 return v, name
511 return v, name
512
512
513 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
513 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
514 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
514 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
515 # above, it should be possible to remove FullEvalFormatter.
515 # above, it should be possible to remove FullEvalFormatter.
516
516
517 @skip_doctest_py3
517 @skip_doctest_py3
518 class FullEvalFormatter(Formatter):
518 class FullEvalFormatter(Formatter):
519 """A String Formatter that allows evaluation of simple expressions.
519 """A String Formatter that allows evaluation of simple expressions.
520
520
521 Any time a format key is not found in the kwargs,
521 Any time a format key is not found in the kwargs,
522 it will be tried as an expression in the kwargs namespace.
522 it will be tried as an expression in the kwargs namespace.
523
523
524 Note that this version allows slicing using [1:2], so you cannot specify
524 Note that this version allows slicing using [1:2], so you cannot specify
525 a format string. Use :class:`EvalFormatter` to permit format strings.
525 a format string. Use :class:`EvalFormatter` to permit format strings.
526
526
527 Examples
527 Examples
528 --------
528 --------
529 ::
529 ::
530
530
531 In [1]: f = FullEvalFormatter()
531 In [1]: f = FullEvalFormatter()
532 In [2]: f.format('{n//4}', n=8)
532 In [2]: f.format('{n//4}', n=8)
533 Out[2]: u'2'
533 Out[2]: u'2'
534
534
535 In [3]: f.format('{list(range(5))[2:4]}')
535 In [3]: f.format('{list(range(5))[2:4]}')
536 Out[3]: u'[2, 3]'
536 Out[3]: u'[2, 3]'
537
537
538 In [4]: f.format('{3*2}')
538 In [4]: f.format('{3*2}')
539 Out[4]: u'6'
539 Out[4]: u'6'
540 """
540 """
541 # copied from Formatter._vformat with minor changes to allow eval
541 # copied from Formatter._vformat with minor changes to allow eval
542 # and replace the format_spec code with slicing
542 # and replace the format_spec code with slicing
543 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
543 def vformat(self, format_string, args, kwargs):
544 if recursion_depth < 0:
545 raise ValueError('Max string recursion exceeded')
546 result = []
544 result = []
547 for literal_text, field_name, format_spec, conversion in \
545 for literal_text, field_name, format_spec, conversion in \
548 self.parse(format_string):
546 self.parse(format_string):
549
547
550 # output the literal text
548 # output the literal text
551 if literal_text:
549 if literal_text:
552 result.append(literal_text)
550 result.append(literal_text)
553
551
554 # if there's a field, output it
552 # if there's a field, output it
555 if field_name is not None:
553 if field_name is not None:
556 # this is some markup, find the object and do
554 # this is some markup, find the object and do
557 # the formatting
555 # the formatting
558
556
559 if format_spec:
557 if format_spec:
560 # override format spec, to allow slicing:
558 # override format spec, to allow slicing:
561 field_name = ':'.join([field_name, format_spec])
559 field_name = ':'.join([field_name, format_spec])
562
560
563 # eval the contents of the field for the object
561 # eval the contents of the field for the object
564 # to be formatted
562 # to be formatted
565 obj = eval(field_name, kwargs)
563 obj = eval(field_name, kwargs)
566
564
567 # do any conversion on the resulting object
565 # do any conversion on the resulting object
568 obj = self.convert_field(obj, conversion)
566 obj = self.convert_field(obj, conversion)
569
567
570 # format the object and append to the result
568 # format the object and append to the result
571 result.append(self.format_field(obj, ''))
569 result.append(self.format_field(obj, ''))
572
570
573 return u''.join(py3compat.cast_unicode(s) for s in result)
571 return u''.join(py3compat.cast_unicode(s) for s in result)
574
572
575
573
576 @skip_doctest_py3
574 @skip_doctest_py3
577 class DollarFormatter(FullEvalFormatter):
575 class DollarFormatter(FullEvalFormatter):
578 """Formatter allowing Itpl style $foo replacement, for names and attribute
576 """Formatter allowing Itpl style $foo replacement, for names and attribute
579 access only. Standard {foo} replacement also works, and allows full
577 access only. Standard {foo} replacement also works, and allows full
580 evaluation of its arguments.
578 evaluation of its arguments.
581
579
582 Examples
580 Examples
583 --------
581 --------
584 ::
582 ::
585
583
586 In [1]: f = DollarFormatter()
584 In [1]: f = DollarFormatter()
587 In [2]: f.format('{n//4}', n=8)
585 In [2]: f.format('{n//4}', n=8)
588 Out[2]: u'2'
586 Out[2]: u'2'
589
587
590 In [3]: f.format('23 * 76 is $result', result=23*76)
588 In [3]: f.format('23 * 76 is $result', result=23*76)
591 Out[3]: u'23 * 76 is 1748'
589 Out[3]: u'23 * 76 is 1748'
592
590
593 In [4]: f.format('$a or {b}', a=1, b=2)
591 In [4]: f.format('$a or {b}', a=1, b=2)
594 Out[4]: u'1 or 2'
592 Out[4]: u'1 or 2'
595 """
593 """
596 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
594 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
597 def parse(self, fmt_string):
595 def parse(self, fmt_string):
598 for literal_txt, field_name, format_spec, conversion \
596 for literal_txt, field_name, format_spec, conversion \
599 in Formatter.parse(self, fmt_string):
597 in Formatter.parse(self, fmt_string):
600
598
601 # Find $foo patterns in the literal text.
599 # Find $foo patterns in the literal text.
602 continue_from = 0
600 continue_from = 0
603 txt = ""
601 txt = ""
604 for m in self._dollar_pattern.finditer(literal_txt):
602 for m in self._dollar_pattern.finditer(literal_txt):
605 new_txt, new_field = m.group(1,2)
603 new_txt, new_field = m.group(1,2)
606 # $$foo --> $foo
604 # $$foo --> $foo
607 if new_field.startswith("$"):
605 if new_field.startswith("$"):
608 txt += new_txt + new_field
606 txt += new_txt + new_field
609 else:
607 else:
610 yield (txt + new_txt, new_field, "", None)
608 yield (txt + new_txt, new_field, "", None)
611 txt = ""
609 txt = ""
612 continue_from = m.end()
610 continue_from = m.end()
613
611
614 # Re-yield the {foo} style pattern
612 # Re-yield the {foo} style pattern
615 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
613 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
616
614
617 #-----------------------------------------------------------------------------
615 #-----------------------------------------------------------------------------
618 # Utils to columnize a list of string
616 # Utils to columnize a list of string
619 #-----------------------------------------------------------------------------
617 #-----------------------------------------------------------------------------
620
618
621 def _col_chunks(l, max_rows, row_first=False):
619 def _col_chunks(l, max_rows, row_first=False):
622 """Yield successive max_rows-sized column chunks from l."""
620 """Yield successive max_rows-sized column chunks from l."""
623 if row_first:
621 if row_first:
624 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
622 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
625 for i in py3compat.xrange(ncols):
623 for i in py3compat.xrange(ncols):
626 yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
624 yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
627 else:
625 else:
628 for i in py3compat.xrange(0, len(l), max_rows):
626 for i in py3compat.xrange(0, len(l), max_rows):
629 yield l[i:(i + max_rows)]
627 yield l[i:(i + max_rows)]
630
628
631
629
632 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
630 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
633 """Calculate optimal info to columnize a list of string"""
631 """Calculate optimal info to columnize a list of string"""
634 for max_rows in range(1, len(rlist) + 1):
632 for max_rows in range(1, len(rlist) + 1):
635 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
633 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
636 sumlength = sum(col_widths)
634 sumlength = sum(col_widths)
637 ncols = len(col_widths)
635 ncols = len(col_widths)
638 if sumlength + separator_size * (ncols - 1) <= displaywidth:
636 if sumlength + separator_size * (ncols - 1) <= displaywidth:
639 break
637 break
640 return {'num_columns': ncols,
638 return {'num_columns': ncols,
641 'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
639 'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
642 'max_rows': max_rows,
640 'max_rows': max_rows,
643 'column_widths': col_widths
641 'column_widths': col_widths
644 }
642 }
645
643
646
644
647 def _get_or_default(mylist, i, default=None):
645 def _get_or_default(mylist, i, default=None):
648 """return list item number, or default if don't exist"""
646 """return list item number, or default if don't exist"""
649 if i >= len(mylist):
647 if i >= len(mylist):
650 return default
648 return default
651 else :
649 else :
652 return mylist[i]
650 return mylist[i]
653
651
654
652
655 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
653 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
656 """Returns a nested list, and info to columnize items
654 """Returns a nested list, and info to columnize items
657
655
658 Parameters
656 Parameters
659 ----------
657 ----------
660
658
661 items
659 items
662 list of strings to columize
660 list of strings to columize
663 row_first : (default False)
661 row_first : (default False)
664 Whether to compute columns for a row-first matrix instead of
662 Whether to compute columns for a row-first matrix instead of
665 column-first (default).
663 column-first (default).
666 empty : (default None)
664 empty : (default None)
667 default value to fill list if needed
665 default value to fill list if needed
668 separator_size : int (default=2)
666 separator_size : int (default=2)
669 How much caracters will be used as a separation between each columns.
667 How much caracters will be used as a separation between each columns.
670 displaywidth : int (default=80)
668 displaywidth : int (default=80)
671 The width of the area onto wich the columns should enter
669 The width of the area onto wich the columns should enter
672
670
673 Returns
671 Returns
674 -------
672 -------
675
673
676 strings_matrix
674 strings_matrix
677
675
678 nested list of string, the outer most list contains as many list as
676 nested list of string, the outer most list contains as many list as
679 rows, the innermost lists have each as many element as colums. If the
677 rows, the innermost lists have each as many element as colums. If the
680 total number of elements in `items` does not equal the product of
678 total number of elements in `items` does not equal the product of
681 rows*columns, the last element of some lists are filled with `None`.
679 rows*columns, the last element of some lists are filled with `None`.
682
680
683 dict_info
681 dict_info
684 some info to make columnize easier:
682 some info to make columnize easier:
685
683
686 num_columns
684 num_columns
687 number of columns
685 number of columns
688 max_rows
686 max_rows
689 maximum number of rows (final number may be less)
687 maximum number of rows (final number may be less)
690 column_widths
688 column_widths
691 list of with of each columns
689 list of with of each columns
692 optimal_separator_width
690 optimal_separator_width
693 best separator width between columns
691 best separator width between columns
694
692
695 Examples
693 Examples
696 --------
694 --------
697 ::
695 ::
698
696
699 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
697 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
700 ...: compute_item_matrix(l, displaywidth=12)
698 ...: compute_item_matrix(l, displaywidth=12)
701 Out[1]:
699 Out[1]:
702 ([['aaa', 'f', 'k'],
700 ([['aaa', 'f', 'k'],
703 ['b', 'g', 'l'],
701 ['b', 'g', 'l'],
704 ['cc', 'h', None],
702 ['cc', 'h', None],
705 ['d', 'i', None],
703 ['d', 'i', None],
706 ['eeeee', 'j', None]],
704 ['eeeee', 'j', None]],
707 {'num_columns': 3,
705 {'num_columns': 3,
708 'column_widths': [5, 1, 1],
706 'column_widths': [5, 1, 1],
709 'optimal_separator_width': 2,
707 'optimal_separator_width': 2,
710 'max_rows': 5})
708 'max_rows': 5})
711 """
709 """
712 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
710 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
713 nrow, ncol = info['max_rows'], info['num_columns']
711 nrow, ncol = info['max_rows'], info['num_columns']
714 if row_first:
712 if row_first:
715 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
713 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
716 else:
714 else:
717 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
715 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
718
716
719
717
720 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
718 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
721 """ Transform a list of strings into a single string with columns.
719 """ Transform a list of strings into a single string with columns.
722
720
723 Parameters
721 Parameters
724 ----------
722 ----------
725 items : sequence of strings
723 items : sequence of strings
726 The strings to process.
724 The strings to process.
727
725
728 row_first : (default False)
726 row_first : (default False)
729 Whether to compute columns for a row-first matrix instead of
727 Whether to compute columns for a row-first matrix instead of
730 column-first (default).
728 column-first (default).
731
729
732 separator : str, optional [default is two spaces]
730 separator : str, optional [default is two spaces]
733 The string that separates columns.
731 The string that separates columns.
734
732
735 displaywidth : int, optional [default is 80]
733 displaywidth : int, optional [default is 80]
736 Width of the display in number of characters.
734 Width of the display in number of characters.
737
735
738 Returns
736 Returns
739 -------
737 -------
740 The formatted string.
738 The formatted string.
741 """
739 """
742 if not items:
740 if not items:
743 return '\n'
741 return '\n'
744 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
742 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
745 if spread:
743 if spread:
746 separator = separator.ljust(int(info['optimal_separator_width']))
744 separator = separator.ljust(int(info['optimal_separator_width']))
747 fmatrix = [filter(None, x) for x in matrix]
745 fmatrix = [filter(None, x) for x in matrix]
748 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
746 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
749 return '\n'.join(map(sjoin, fmatrix))+'\n'
747 return '\n'.join(map(sjoin, fmatrix))+'\n'
750
748
751
749
752 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
750 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
753 """
751 """
754 Return a string with a natural enumeration of items
752 Return a string with a natural enumeration of items
755
753
756 >>> get_text_list(['a', 'b', 'c', 'd'])
754 >>> get_text_list(['a', 'b', 'c', 'd'])
757 'a, b, c and d'
755 'a, b, c and d'
758 >>> get_text_list(['a', 'b', 'c'], ' or ')
756 >>> get_text_list(['a', 'b', 'c'], ' or ')
759 'a, b or c'
757 'a, b or c'
760 >>> get_text_list(['a', 'b', 'c'], ', ')
758 >>> get_text_list(['a', 'b', 'c'], ', ')
761 'a, b, c'
759 'a, b, c'
762 >>> get_text_list(['a', 'b'], ' or ')
760 >>> get_text_list(['a', 'b'], ' or ')
763 'a or b'
761 'a or b'
764 >>> get_text_list(['a'])
762 >>> get_text_list(['a'])
765 'a'
763 'a'
766 >>> get_text_list([])
764 >>> get_text_list([])
767 ''
765 ''
768 >>> get_text_list(['a', 'b'], wrap_item_with="`")
766 >>> get_text_list(['a', 'b'], wrap_item_with="`")
769 '`a` and `b`'
767 '`a` and `b`'
770 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
768 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
771 'a + b + c = d'
769 'a + b + c = d'
772 """
770 """
773 if len(list_) == 0:
771 if len(list_) == 0:
774 return ''
772 return ''
775 if wrap_item_with:
773 if wrap_item_with:
776 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
774 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
777 item in list_]
775 item in list_]
778 if len(list_) == 1:
776 if len(list_) == 1:
779 return list_[0]
777 return list_[0]
780 return '%s%s%s' % (
778 return '%s%s%s' % (
781 sep.join(i for i in list_[:-1]),
779 sep.join(i for i in list_[:-1]),
782 last_sep, list_[-1])
780 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now