##// END OF EJS Templates
Row-first option for columnize
naught101 -
Show More
@@ -1,765 +1,780 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import os
12 import os
13 import re
13 import re
14 import sys
14 import sys
15 import textwrap
15 import textwrap
16 from string import Formatter
16 from string import Formatter
17
17
18 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
18 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
19 from IPython.utils import py3compat
19 from IPython.utils import py3compat
20
20
21 # datetime.strftime date format for ipython
21 # datetime.strftime date format for ipython
22 if sys.platform == 'win32':
22 if sys.platform == 'win32':
23 date_format = "%B %d, %Y"
23 date_format = "%B %d, %Y"
24 else:
24 else:
25 date_format = "%B %-d, %Y"
25 date_format = "%B %-d, %Y"
26
26
27 class LSString(str):
27 class LSString(str):
28 """String derivative with a special access attributes.
28 """String derivative with a special access attributes.
29
29
30 These are normal strings, but with the special attributes:
30 These are normal strings, but with the special attributes:
31
31
32 .l (or .list) : value as list (split on newlines).
32 .l (or .list) : value as list (split on newlines).
33 .n (or .nlstr): original value (the string itself).
33 .n (or .nlstr): original value (the string itself).
34 .s (or .spstr): value as whitespace-separated string.
34 .s (or .spstr): value as whitespace-separated string.
35 .p (or .paths): list of path objects (requires path.py package)
35 .p (or .paths): list of path objects (requires path.py package)
36
36
37 Any values which require transformations are computed only once and
37 Any values which require transformations are computed only once and
38 cached.
38 cached.
39
39
40 Such strings are very useful to efficiently interact with the shell, which
40 Such strings are very useful to efficiently interact with the shell, which
41 typically only understands whitespace-separated options for commands."""
41 typically only understands whitespace-separated options for commands."""
42
42
43 def get_list(self):
43 def get_list(self):
44 try:
44 try:
45 return self.__list
45 return self.__list
46 except AttributeError:
46 except AttributeError:
47 self.__list = self.split('\n')
47 self.__list = self.split('\n')
48 return self.__list
48 return self.__list
49
49
50 l = list = property(get_list)
50 l = list = property(get_list)
51
51
52 def get_spstr(self):
52 def get_spstr(self):
53 try:
53 try:
54 return self.__spstr
54 return self.__spstr
55 except AttributeError:
55 except AttributeError:
56 self.__spstr = self.replace('\n',' ')
56 self.__spstr = self.replace('\n',' ')
57 return self.__spstr
57 return self.__spstr
58
58
59 s = spstr = property(get_spstr)
59 s = spstr = property(get_spstr)
60
60
61 def get_nlstr(self):
61 def get_nlstr(self):
62 return self
62 return self
63
63
64 n = nlstr = property(get_nlstr)
64 n = nlstr = property(get_nlstr)
65
65
66 def get_paths(self):
66 def get_paths(self):
67 from path import path
67 from path import path
68 try:
68 try:
69 return self.__paths
69 return self.__paths
70 except AttributeError:
70 except AttributeError:
71 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
71 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
72 return self.__paths
72 return self.__paths
73
73
74 p = paths = property(get_paths)
74 p = paths = property(get_paths)
75
75
76 # FIXME: We need to reimplement type specific displayhook and then add this
76 # FIXME: We need to reimplement type specific displayhook and then add this
77 # back as a custom printer. This should also be moved outside utils into the
77 # back as a custom printer. This should also be moved outside utils into the
78 # core.
78 # core.
79
79
80 # def print_lsstring(arg):
80 # def print_lsstring(arg):
81 # """ Prettier (non-repr-like) and more informative printer for LSString """
81 # """ Prettier (non-repr-like) and more informative printer for LSString """
82 # print "LSString (.p, .n, .l, .s available). Value:"
82 # print "LSString (.p, .n, .l, .s available). Value:"
83 # print arg
83 # print arg
84 #
84 #
85 #
85 #
86 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
86 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
87
87
88
88
89 class SList(list):
89 class SList(list):
90 """List derivative with a special access attributes.
90 """List derivative with a special access attributes.
91
91
92 These are normal lists, but with the special attributes:
92 These are normal lists, but with the special attributes:
93
93
94 * .l (or .list) : value as list (the list itself).
94 * .l (or .list) : value as list (the list itself).
95 * .n (or .nlstr): value as a string, joined on newlines.
95 * .n (or .nlstr): value as a string, joined on newlines.
96 * .s (or .spstr): value as a string, joined on spaces.
96 * .s (or .spstr): value as a string, joined on spaces.
97 * .p (or .paths): list of path objects (requires path.py package)
97 * .p (or .paths): list of path objects (requires path.py package)
98
98
99 Any values which require transformations are computed only once and
99 Any values which require transformations are computed only once and
100 cached."""
100 cached."""
101
101
102 def get_list(self):
102 def get_list(self):
103 return self
103 return self
104
104
105 l = list = property(get_list)
105 l = list = property(get_list)
106
106
107 def get_spstr(self):
107 def get_spstr(self):
108 try:
108 try:
109 return self.__spstr
109 return self.__spstr
110 except AttributeError:
110 except AttributeError:
111 self.__spstr = ' '.join(self)
111 self.__spstr = ' '.join(self)
112 return self.__spstr
112 return self.__spstr
113
113
114 s = spstr = property(get_spstr)
114 s = spstr = property(get_spstr)
115
115
116 def get_nlstr(self):
116 def get_nlstr(self):
117 try:
117 try:
118 return self.__nlstr
118 return self.__nlstr
119 except AttributeError:
119 except AttributeError:
120 self.__nlstr = '\n'.join(self)
120 self.__nlstr = '\n'.join(self)
121 return self.__nlstr
121 return self.__nlstr
122
122
123 n = nlstr = property(get_nlstr)
123 n = nlstr = property(get_nlstr)
124
124
125 def get_paths(self):
125 def get_paths(self):
126 from path import path
126 from path import path
127 try:
127 try:
128 return self.__paths
128 return self.__paths
129 except AttributeError:
129 except AttributeError:
130 self.__paths = [path(p) for p in self if os.path.exists(p)]
130 self.__paths = [path(p) for p in self if os.path.exists(p)]
131 return self.__paths
131 return self.__paths
132
132
133 p = paths = property(get_paths)
133 p = paths = property(get_paths)
134
134
135 def grep(self, pattern, prune = False, field = None):
135 def grep(self, pattern, prune = False, field = None):
136 """ Return all strings matching 'pattern' (a regex or callable)
136 """ Return all strings matching 'pattern' (a regex or callable)
137
137
138 This is case-insensitive. If prune is true, return all items
138 This is case-insensitive. If prune is true, return all items
139 NOT matching the pattern.
139 NOT matching the pattern.
140
140
141 If field is specified, the match must occur in the specified
141 If field is specified, the match must occur in the specified
142 whitespace-separated field.
142 whitespace-separated field.
143
143
144 Examples::
144 Examples::
145
145
146 a.grep( lambda x: x.startswith('C') )
146 a.grep( lambda x: x.startswith('C') )
147 a.grep('Cha.*log', prune=1)
147 a.grep('Cha.*log', prune=1)
148 a.grep('chm', field=-1)
148 a.grep('chm', field=-1)
149 """
149 """
150
150
151 def match_target(s):
151 def match_target(s):
152 if field is None:
152 if field is None:
153 return s
153 return s
154 parts = s.split()
154 parts = s.split()
155 try:
155 try:
156 tgt = parts[field]
156 tgt = parts[field]
157 return tgt
157 return tgt
158 except IndexError:
158 except IndexError:
159 return ""
159 return ""
160
160
161 if isinstance(pattern, py3compat.string_types):
161 if isinstance(pattern, py3compat.string_types):
162 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
162 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
163 else:
163 else:
164 pred = pattern
164 pred = pattern
165 if not prune:
165 if not prune:
166 return SList([el for el in self if pred(match_target(el))])
166 return SList([el for el in self if pred(match_target(el))])
167 else:
167 else:
168 return SList([el for el in self if not pred(match_target(el))])
168 return SList([el for el in self if not pred(match_target(el))])
169
169
170 def fields(self, *fields):
170 def fields(self, *fields):
171 """ Collect whitespace-separated fields from string list
171 """ Collect whitespace-separated fields from string list
172
172
173 Allows quick awk-like usage of string lists.
173 Allows quick awk-like usage of string lists.
174
174
175 Example data (in var a, created by 'a = !ls -l')::
175 Example data (in var a, created by 'a = !ls -l')::
176
176
177 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
177 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
178 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
178 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
179
179
180 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
180 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
181 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
181 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
182 (note the joining by space).
182 (note the joining by space).
183 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
183 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
184
184
185 IndexErrors are ignored.
185 IndexErrors are ignored.
186
186
187 Without args, fields() just split()'s the strings.
187 Without args, fields() just split()'s the strings.
188 """
188 """
189 if len(fields) == 0:
189 if len(fields) == 0:
190 return [el.split() for el in self]
190 return [el.split() for el in self]
191
191
192 res = SList()
192 res = SList()
193 for el in [f.split() for f in self]:
193 for el in [f.split() for f in self]:
194 lineparts = []
194 lineparts = []
195
195
196 for fd in fields:
196 for fd in fields:
197 try:
197 try:
198 lineparts.append(el[fd])
198 lineparts.append(el[fd])
199 except IndexError:
199 except IndexError:
200 pass
200 pass
201 if lineparts:
201 if lineparts:
202 res.append(" ".join(lineparts))
202 res.append(" ".join(lineparts))
203
203
204 return res
204 return res
205
205
206 def sort(self,field= None, nums = False):
206 def sort(self,field= None, nums = False):
207 """ sort by specified fields (see fields())
207 """ sort by specified fields (see fields())
208
208
209 Example::
209 Example::
210
210
211 a.sort(1, nums = True)
211 a.sort(1, nums = True)
212
212
213 Sorts a by second field, in numerical order (so that 21 > 3)
213 Sorts a by second field, in numerical order (so that 21 > 3)
214
214
215 """
215 """
216
216
217 #decorate, sort, undecorate
217 #decorate, sort, undecorate
218 if field is not None:
218 if field is not None:
219 dsu = [[SList([line]).fields(field), line] for line in self]
219 dsu = [[SList([line]).fields(field), line] for line in self]
220 else:
220 else:
221 dsu = [[line, line] for line in self]
221 dsu = [[line, line] for line in self]
222 if nums:
222 if nums:
223 for i in range(len(dsu)):
223 for i in range(len(dsu)):
224 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
224 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
225 try:
225 try:
226 n = int(numstr)
226 n = int(numstr)
227 except ValueError:
227 except ValueError:
228 n = 0;
228 n = 0;
229 dsu[i][0] = n
229 dsu[i][0] = n
230
230
231
231
232 dsu.sort()
232 dsu.sort()
233 return SList([t[1] for t in dsu])
233 return SList([t[1] for t in dsu])
234
234
235
235
236 # FIXME: We need to reimplement type specific displayhook and then add this
236 # FIXME: We need to reimplement type specific displayhook and then add this
237 # back as a custom printer. This should also be moved outside utils into the
237 # back as a custom printer. This should also be moved outside utils into the
238 # core.
238 # core.
239
239
240 # def print_slist(arg):
240 # def print_slist(arg):
241 # """ Prettier (non-repr-like) and more informative printer for SList """
241 # """ Prettier (non-repr-like) and more informative printer for SList """
242 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
242 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
243 # if hasattr(arg, 'hideonce') and arg.hideonce:
243 # if hasattr(arg, 'hideonce') and arg.hideonce:
244 # arg.hideonce = False
244 # arg.hideonce = False
245 # return
245 # return
246 #
246 #
247 # nlprint(arg) # This was a nested list printer, now removed.
247 # nlprint(arg) # This was a nested list printer, now removed.
248 #
248 #
249 # print_slist = result_display.when_type(SList)(print_slist)
249 # print_slist = result_display.when_type(SList)(print_slist)
250
250
251
251
252 def indent(instr,nspaces=4, ntabs=0, flatten=False):
252 def indent(instr,nspaces=4, ntabs=0, flatten=False):
253 """Indent a string a given number of spaces or tabstops.
253 """Indent a string a given number of spaces or tabstops.
254
254
255 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
255 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
256
256
257 Parameters
257 Parameters
258 ----------
258 ----------
259
259
260 instr : basestring
260 instr : basestring
261 The string to be indented.
261 The string to be indented.
262 nspaces : int (default: 4)
262 nspaces : int (default: 4)
263 The number of spaces to be indented.
263 The number of spaces to be indented.
264 ntabs : int (default: 0)
264 ntabs : int (default: 0)
265 The number of tabs to be indented.
265 The number of tabs to be indented.
266 flatten : bool (default: False)
266 flatten : bool (default: False)
267 Whether to scrub existing indentation. If True, all lines will be
267 Whether to scrub existing indentation. If True, all lines will be
268 aligned to the same indentation. If False, existing indentation will
268 aligned to the same indentation. If False, existing indentation will
269 be strictly increased.
269 be strictly increased.
270
270
271 Returns
271 Returns
272 -------
272 -------
273
273
274 str|unicode : string indented by ntabs and nspaces.
274 str|unicode : string indented by ntabs and nspaces.
275
275
276 """
276 """
277 if instr is None:
277 if instr is None:
278 return
278 return
279 ind = '\t'*ntabs+' '*nspaces
279 ind = '\t'*ntabs+' '*nspaces
280 if flatten:
280 if flatten:
281 pat = re.compile(r'^\s*', re.MULTILINE)
281 pat = re.compile(r'^\s*', re.MULTILINE)
282 else:
282 else:
283 pat = re.compile(r'^', re.MULTILINE)
283 pat = re.compile(r'^', re.MULTILINE)
284 outstr = re.sub(pat, ind, instr)
284 outstr = re.sub(pat, ind, instr)
285 if outstr.endswith(os.linesep+ind):
285 if outstr.endswith(os.linesep+ind):
286 return outstr[:-len(ind)]
286 return outstr[:-len(ind)]
287 else:
287 else:
288 return outstr
288 return outstr
289
289
290
290
291 def list_strings(arg):
291 def list_strings(arg):
292 """Always return a list of strings, given a string or list of strings
292 """Always return a list of strings, given a string or list of strings
293 as input.
293 as input.
294
294
295 Examples
295 Examples
296 --------
296 --------
297 ::
297 ::
298
298
299 In [7]: list_strings('A single string')
299 In [7]: list_strings('A single string')
300 Out[7]: ['A single string']
300 Out[7]: ['A single string']
301
301
302 In [8]: list_strings(['A single string in a list'])
302 In [8]: list_strings(['A single string in a list'])
303 Out[8]: ['A single string in a list']
303 Out[8]: ['A single string in a list']
304
304
305 In [9]: list_strings(['A','list','of','strings'])
305 In [9]: list_strings(['A','list','of','strings'])
306 Out[9]: ['A', 'list', 'of', 'strings']
306 Out[9]: ['A', 'list', 'of', 'strings']
307 """
307 """
308
308
309 if isinstance(arg, py3compat.string_types): return [arg]
309 if isinstance(arg, py3compat.string_types): return [arg]
310 else: return arg
310 else: return arg
311
311
312
312
313 def marquee(txt='',width=78,mark='*'):
313 def marquee(txt='',width=78,mark='*'):
314 """Return the input string centered in a 'marquee'.
314 """Return the input string centered in a 'marquee'.
315
315
316 Examples
316 Examples
317 --------
317 --------
318 ::
318 ::
319
319
320 In [16]: marquee('A test',40)
320 In [16]: marquee('A test',40)
321 Out[16]: '**************** A test ****************'
321 Out[16]: '**************** A test ****************'
322
322
323 In [17]: marquee('A test',40,'-')
323 In [17]: marquee('A test',40,'-')
324 Out[17]: '---------------- A test ----------------'
324 Out[17]: '---------------- A test ----------------'
325
325
326 In [18]: marquee('A test',40,' ')
326 In [18]: marquee('A test',40,' ')
327 Out[18]: ' A test '
327 Out[18]: ' A test '
328
328
329 """
329 """
330 if not txt:
330 if not txt:
331 return (mark*width)[:width]
331 return (mark*width)[:width]
332 nmark = (width-len(txt)-2)//len(mark)//2
332 nmark = (width-len(txt)-2)//len(mark)//2
333 if nmark < 0: nmark =0
333 if nmark < 0: nmark =0
334 marks = mark*nmark
334 marks = mark*nmark
335 return '%s %s %s' % (marks,txt,marks)
335 return '%s %s %s' % (marks,txt,marks)
336
336
337
337
338 ini_spaces_re = re.compile(r'^(\s+)')
338 ini_spaces_re = re.compile(r'^(\s+)')
339
339
340 def num_ini_spaces(strng):
340 def num_ini_spaces(strng):
341 """Return the number of initial spaces in a string"""
341 """Return the number of initial spaces in a string"""
342
342
343 ini_spaces = ini_spaces_re.match(strng)
343 ini_spaces = ini_spaces_re.match(strng)
344 if ini_spaces:
344 if ini_spaces:
345 return ini_spaces.end()
345 return ini_spaces.end()
346 else:
346 else:
347 return 0
347 return 0
348
348
349
349
350 def format_screen(strng):
350 def format_screen(strng):
351 """Format a string for screen printing.
351 """Format a string for screen printing.
352
352
353 This removes some latex-type format codes."""
353 This removes some latex-type format codes."""
354 # Paragraph continue
354 # Paragraph continue
355 par_re = re.compile(r'\\$',re.MULTILINE)
355 par_re = re.compile(r'\\$',re.MULTILINE)
356 strng = par_re.sub('',strng)
356 strng = par_re.sub('',strng)
357 return strng
357 return strng
358
358
359
359
360 def dedent(text):
360 def dedent(text):
361 """Equivalent of textwrap.dedent that ignores unindented first line.
361 """Equivalent of textwrap.dedent that ignores unindented first line.
362
362
363 This means it will still dedent strings like:
363 This means it will still dedent strings like:
364 '''foo
364 '''foo
365 is a bar
365 is a bar
366 '''
366 '''
367
367
368 For use in wrap_paragraphs.
368 For use in wrap_paragraphs.
369 """
369 """
370
370
371 if text.startswith('\n'):
371 if text.startswith('\n'):
372 # text starts with blank line, don't ignore the first line
372 # text starts with blank line, don't ignore the first line
373 return textwrap.dedent(text)
373 return textwrap.dedent(text)
374
374
375 # split first line
375 # split first line
376 splits = text.split('\n',1)
376 splits = text.split('\n',1)
377 if len(splits) == 1:
377 if len(splits) == 1:
378 # only one line
378 # only one line
379 return textwrap.dedent(text)
379 return textwrap.dedent(text)
380
380
381 first, rest = splits
381 first, rest = splits
382 # dedent everything but the first line
382 # dedent everything but the first line
383 rest = textwrap.dedent(rest)
383 rest = textwrap.dedent(rest)
384 return '\n'.join([first, rest])
384 return '\n'.join([first, rest])
385
385
386
386
387 def wrap_paragraphs(text, ncols=80):
387 def wrap_paragraphs(text, ncols=80):
388 """Wrap multiple paragraphs to fit a specified width.
388 """Wrap multiple paragraphs to fit a specified width.
389
389
390 This is equivalent to textwrap.wrap, but with support for multiple
390 This is equivalent to textwrap.wrap, but with support for multiple
391 paragraphs, as separated by empty lines.
391 paragraphs, as separated by empty lines.
392
392
393 Returns
393 Returns
394 -------
394 -------
395
395
396 list of complete paragraphs, wrapped to fill `ncols` columns.
396 list of complete paragraphs, wrapped to fill `ncols` columns.
397 """
397 """
398 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
398 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
399 text = dedent(text).strip()
399 text = dedent(text).strip()
400 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
400 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
401 out_ps = []
401 out_ps = []
402 indent_re = re.compile(r'\n\s+', re.MULTILINE)
402 indent_re = re.compile(r'\n\s+', re.MULTILINE)
403 for p in paragraphs:
403 for p in paragraphs:
404 # presume indentation that survives dedent is meaningful formatting,
404 # presume indentation that survives dedent is meaningful formatting,
405 # so don't fill unless text is flush.
405 # so don't fill unless text is flush.
406 if indent_re.search(p) is None:
406 if indent_re.search(p) is None:
407 # wrap paragraph
407 # wrap paragraph
408 p = textwrap.fill(p, ncols)
408 p = textwrap.fill(p, ncols)
409 out_ps.append(p)
409 out_ps.append(p)
410 return out_ps
410 return out_ps
411
411
412
412
413 def long_substr(data):
413 def long_substr(data):
414 """Return the longest common substring in a list of strings.
414 """Return the longest common substring in a list of strings.
415
415
416 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
416 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
417 """
417 """
418 substr = ''
418 substr = ''
419 if len(data) > 1 and len(data[0]) > 0:
419 if len(data) > 1 and len(data[0]) > 0:
420 for i in range(len(data[0])):
420 for i in range(len(data[0])):
421 for j in range(len(data[0])-i+1):
421 for j in range(len(data[0])-i+1):
422 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
422 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
423 substr = data[0][i:i+j]
423 substr = data[0][i:i+j]
424 elif len(data) == 1:
424 elif len(data) == 1:
425 substr = data[0]
425 substr = data[0]
426 return substr
426 return substr
427
427
428
428
429 def strip_email_quotes(text):
429 def strip_email_quotes(text):
430 """Strip leading email quotation characters ('>').
430 """Strip leading email quotation characters ('>').
431
431
432 Removes any combination of leading '>' interspersed with whitespace that
432 Removes any combination of leading '>' interspersed with whitespace that
433 appears *identically* in all lines of the input text.
433 appears *identically* in all lines of the input text.
434
434
435 Parameters
435 Parameters
436 ----------
436 ----------
437 text : str
437 text : str
438
438
439 Examples
439 Examples
440 --------
440 --------
441
441
442 Simple uses::
442 Simple uses::
443
443
444 In [2]: strip_email_quotes('> > text')
444 In [2]: strip_email_quotes('> > text')
445 Out[2]: 'text'
445 Out[2]: 'text'
446
446
447 In [3]: strip_email_quotes('> > text\\n> > more')
447 In [3]: strip_email_quotes('> > text\\n> > more')
448 Out[3]: 'text\\nmore'
448 Out[3]: 'text\\nmore'
449
449
450 Note how only the common prefix that appears in all lines is stripped::
450 Note how only the common prefix that appears in all lines is stripped::
451
451
452 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
452 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
453 Out[4]: '> text\\n> more\\nmore...'
453 Out[4]: '> text\\n> more\\nmore...'
454
454
455 So if any line has no quote marks ('>') , then none are stripped from any
455 So if any line has no quote marks ('>') , then none are stripped from any
456 of them ::
456 of them ::
457
457
458 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
458 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
459 Out[5]: '> > text\\n> > more\\nlast different'
459 Out[5]: '> > text\\n> > more\\nlast different'
460 """
460 """
461 lines = text.splitlines()
461 lines = text.splitlines()
462 matches = set()
462 matches = set()
463 for line in lines:
463 for line in lines:
464 prefix = re.match(r'^(\s*>[ >]*)', line)
464 prefix = re.match(r'^(\s*>[ >]*)', line)
465 if prefix:
465 if prefix:
466 matches.add(prefix.group(1))
466 matches.add(prefix.group(1))
467 else:
467 else:
468 break
468 break
469 else:
469 else:
470 prefix = long_substr(list(matches))
470 prefix = long_substr(list(matches))
471 if prefix:
471 if prefix:
472 strip = len(prefix)
472 strip = len(prefix)
473 text = '\n'.join([ ln[strip:] for ln in lines])
473 text = '\n'.join([ ln[strip:] for ln in lines])
474 return text
474 return text
475
475
476 def strip_ansi(source):
476 def strip_ansi(source):
477 """
477 """
478 Remove ansi escape codes from text.
478 Remove ansi escape codes from text.
479
479
480 Parameters
480 Parameters
481 ----------
481 ----------
482 source : str
482 source : str
483 Source to remove the ansi from
483 Source to remove the ansi from
484 """
484 """
485 return re.sub(r'\033\[(\d|;)+?m', '', source)
485 return re.sub(r'\033\[(\d|;)+?m', '', source)
486
486
487
487
488 class EvalFormatter(Formatter):
488 class EvalFormatter(Formatter):
489 """A String Formatter that allows evaluation of simple expressions.
489 """A String Formatter that allows evaluation of simple expressions.
490
490
491 Note that this version interprets a : as specifying a format string (as per
491 Note that this version interprets a : as specifying a format string (as per
492 standard string formatting), so if slicing is required, you must explicitly
492 standard string formatting), so if slicing is required, you must explicitly
493 create a slice.
493 create a slice.
494
494
495 This is to be used in templating cases, such as the parallel batch
495 This is to be used in templating cases, such as the parallel batch
496 script templates, where simple arithmetic on arguments is useful.
496 script templates, where simple arithmetic on arguments is useful.
497
497
498 Examples
498 Examples
499 --------
499 --------
500 ::
500 ::
501
501
502 In [1]: f = EvalFormatter()
502 In [1]: f = EvalFormatter()
503 In [2]: f.format('{n//4}', n=8)
503 In [2]: f.format('{n//4}', n=8)
504 Out[2]: '2'
504 Out[2]: '2'
505
505
506 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
506 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
507 Out[3]: 'll'
507 Out[3]: 'll'
508 """
508 """
509 def get_field(self, name, args, kwargs):
509 def get_field(self, name, args, kwargs):
510 v = eval(name, kwargs)
510 v = eval(name, kwargs)
511 return v, name
511 return v, name
512
512
513 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
513 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
514 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
514 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
515 # above, it should be possible to remove FullEvalFormatter.
515 # above, it should be possible to remove FullEvalFormatter.
516
516
517 @skip_doctest_py3
517 @skip_doctest_py3
518 class FullEvalFormatter(Formatter):
518 class FullEvalFormatter(Formatter):
519 """A String Formatter that allows evaluation of simple expressions.
519 """A String Formatter that allows evaluation of simple expressions.
520
520
521 Any time a format key is not found in the kwargs,
521 Any time a format key is not found in the kwargs,
522 it will be tried as an expression in the kwargs namespace.
522 it will be tried as an expression in the kwargs namespace.
523
523
524 Note that this version allows slicing using [1:2], so you cannot specify
524 Note that this version allows slicing using [1:2], so you cannot specify
525 a format string. Use :class:`EvalFormatter` to permit format strings.
525 a format string. Use :class:`EvalFormatter` to permit format strings.
526
526
527 Examples
527 Examples
528 --------
528 --------
529 ::
529 ::
530
530
531 In [1]: f = FullEvalFormatter()
531 In [1]: f = FullEvalFormatter()
532 In [2]: f.format('{n//4}', n=8)
532 In [2]: f.format('{n//4}', n=8)
533 Out[2]: u'2'
533 Out[2]: u'2'
534
534
535 In [3]: f.format('{list(range(5))[2:4]}')
535 In [3]: f.format('{list(range(5))[2:4]}')
536 Out[3]: u'[2, 3]'
536 Out[3]: u'[2, 3]'
537
537
538 In [4]: f.format('{3*2}')
538 In [4]: f.format('{3*2}')
539 Out[4]: u'6'
539 Out[4]: u'6'
540 """
540 """
541 # copied from Formatter._vformat with minor changes to allow eval
541 # copied from Formatter._vformat with minor changes to allow eval
542 # and replace the format_spec code with slicing
542 # and replace the format_spec code with slicing
543 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
543 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
544 if recursion_depth < 0:
544 if recursion_depth < 0:
545 raise ValueError('Max string recursion exceeded')
545 raise ValueError('Max string recursion exceeded')
546 result = []
546 result = []
547 for literal_text, field_name, format_spec, conversion in \
547 for literal_text, field_name, format_spec, conversion in \
548 self.parse(format_string):
548 self.parse(format_string):
549
549
550 # output the literal text
550 # output the literal text
551 if literal_text:
551 if literal_text:
552 result.append(literal_text)
552 result.append(literal_text)
553
553
554 # if there's a field, output it
554 # if there's a field, output it
555 if field_name is not None:
555 if field_name is not None:
556 # this is some markup, find the object and do
556 # this is some markup, find the object and do
557 # the formatting
557 # the formatting
558
558
559 if format_spec:
559 if format_spec:
560 # override format spec, to allow slicing:
560 # override format spec, to allow slicing:
561 field_name = ':'.join([field_name, format_spec])
561 field_name = ':'.join([field_name, format_spec])
562
562
563 # eval the contents of the field for the object
563 # eval the contents of the field for the object
564 # to be formatted
564 # to be formatted
565 obj = eval(field_name, kwargs)
565 obj = eval(field_name, kwargs)
566
566
567 # do any conversion on the resulting object
567 # do any conversion on the resulting object
568 obj = self.convert_field(obj, conversion)
568 obj = self.convert_field(obj, conversion)
569
569
570 # format the object and append to the result
570 # format the object and append to the result
571 result.append(self.format_field(obj, ''))
571 result.append(self.format_field(obj, ''))
572
572
573 return u''.join(py3compat.cast_unicode(s) for s in result)
573 return u''.join(py3compat.cast_unicode(s) for s in result)
574
574
575
575
576 @skip_doctest_py3
576 @skip_doctest_py3
577 class DollarFormatter(FullEvalFormatter):
577 class DollarFormatter(FullEvalFormatter):
578 """Formatter allowing Itpl style $foo replacement, for names and attribute
578 """Formatter allowing Itpl style $foo replacement, for names and attribute
579 access only. Standard {foo} replacement also works, and allows full
579 access only. Standard {foo} replacement also works, and allows full
580 evaluation of its arguments.
580 evaluation of its arguments.
581
581
582 Examples
582 Examples
583 --------
583 --------
584 ::
584 ::
585
585
586 In [1]: f = DollarFormatter()
586 In [1]: f = DollarFormatter()
587 In [2]: f.format('{n//4}', n=8)
587 In [2]: f.format('{n//4}', n=8)
588 Out[2]: u'2'
588 Out[2]: u'2'
589
589
590 In [3]: f.format('23 * 76 is $result', result=23*76)
590 In [3]: f.format('23 * 76 is $result', result=23*76)
591 Out[3]: u'23 * 76 is 1748'
591 Out[3]: u'23 * 76 is 1748'
592
592
593 In [4]: f.format('$a or {b}', a=1, b=2)
593 In [4]: f.format('$a or {b}', a=1, b=2)
594 Out[4]: u'1 or 2'
594 Out[4]: u'1 or 2'
595 """
595 """
596 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
596 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
597 def parse(self, fmt_string):
597 def parse(self, fmt_string):
598 for literal_txt, field_name, format_spec, conversion \
598 for literal_txt, field_name, format_spec, conversion \
599 in Formatter.parse(self, fmt_string):
599 in Formatter.parse(self, fmt_string):
600
600
601 # Find $foo patterns in the literal text.
601 # Find $foo patterns in the literal text.
602 continue_from = 0
602 continue_from = 0
603 txt = ""
603 txt = ""
604 for m in self._dollar_pattern.finditer(literal_txt):
604 for m in self._dollar_pattern.finditer(literal_txt):
605 new_txt, new_field = m.group(1,2)
605 new_txt, new_field = m.group(1,2)
606 # $$foo --> $foo
606 # $$foo --> $foo
607 if new_field.startswith("$"):
607 if new_field.startswith("$"):
608 txt += new_txt + new_field
608 txt += new_txt + new_field
609 else:
609 else:
610 yield (txt + new_txt, new_field, "", None)
610 yield (txt + new_txt, new_field, "", None)
611 txt = ""
611 txt = ""
612 continue_from = m.end()
612 continue_from = m.end()
613
613
614 # Re-yield the {foo} style pattern
614 # Re-yield the {foo} style pattern
615 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
615 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
616
616
617 #-----------------------------------------------------------------------------
617 #-----------------------------------------------------------------------------
618 # Utils to columnize a list of string
618 # Utils to columnize a list of string
619 #-----------------------------------------------------------------------------
619 #-----------------------------------------------------------------------------
620
620
621 def _chunks(l, n):
621 def _col_chunks(l, nrows, row_first=False):
622 """Yield successive n-sized chunks from l."""
622 """Yield successive nrows-sized column chunks from l."""
623 for i in py3compat.xrange(0, len(l), n):
623 if row_first:
624 yield l[i:i+n]
624 ncols = (len(l) // nrows) + (len(l) % nrows > 0)
625 for i in py3compat.xrange(ncols):
626 yield [l[j] for j in py3compat.xrange(i, len(l), nrows)]
627 else:
628 for i in py3compat.xrange(0, len(l), nrows):
629 yield l[i:(i + nrows)]
625
630
626
631
627 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
632 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
628 """Calculate optimal info to columnize a list of string"""
633 """Calculate optimal info to columnize a list of string"""
629 for nrow in range(1, len(rlist)+1) :
634 for nrow in range(1, len(rlist) + 1):
630 chk = list(map(max,_chunks(rlist, nrow)))
635 col_widths = list(map(max, _col_chunks(rlist, nrow, row_first)))
631 sumlength = sum(chk)
636 sumlength = sum(col_widths)
632 ncols = len(chk)
637 ncols = len(col_widths)
633 if sumlength+separator_size*(ncols-1) <= displaywidth :
638 if sumlength + separator_size * (ncols - 1) <= displaywidth:
634 break;
639 break
635 return {'columns_numbers' : ncols,
640 return {'num_columns': ncols,
636 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
641 'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
637 'rows_numbers' : nrow,
642 'num_rows': nrow,
638 'columns_width' : chk
643 'column_widths': col_widths
639 }
644 }
640
645
641
646
642 def _get_or_default(mylist, i, default=None):
647 def _get_or_default(mylist, i, default=None):
643 """return list item number, or default if don't exist"""
648 """return list item number, or default if don't exist"""
644 if i >= len(mylist):
649 if i >= len(mylist):
645 return default
650 return default
646 else :
651 else :
647 return mylist[i]
652 return mylist[i]
648
653
649
654
650 def compute_item_matrix(items, empty=None, *args, **kwargs) :
655 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
651 """Returns a nested list, and info to columnize items
656 """Returns a nested list, and info to columnize items
652
657
653 Parameters
658 Parameters
654 ----------
659 ----------
655
660
656 items
661 items
657 list of strings to columize
662 list of strings to columize
663 row_first : (default False)
664 Whether to to compute columns for a row-first matrix instead ofr
665 column-first (default).
658 empty : (default None)
666 empty : (default None)
659 default value to fill list if needed
667 default value to fill list if needed
660 separator_size : int (default=2)
668 separator_size : int (default=2)
661 How much caracters will be used as a separation between each columns.
669 How much caracters will be used as a separation between each columns.
662 displaywidth : int (default=80)
670 displaywidth : int (default=80)
663 The width of the area onto wich the columns should enter
671 The width of the area onto wich the columns should enter
664
672
665 Returns
673 Returns
666 -------
674 -------
667
675
668 strings_matrix
676 strings_matrix
669
677
670 nested list of string, the outer most list contains as many list as
678 nested list of string, the outer most list contains as many list as
671 rows, the innermost lists have each as many element as colums. If the
679 rows, the innermost lists have each as many element as colums. If the
672 total number of elements in `items` does not equal the product of
680 total number of elements in `items` does not equal the product of
673 rows*columns, the last element of some lists are filled with `None`.
681 rows*columns, the last element of some lists are filled with `None`.
674
682
675 dict_info
683 dict_info
676 some info to make columnize easier:
684 some info to make columnize easier:
677
685
678 columns_numbers
686 num_columns
679 number of columns
687 number of columns
680 rows_numbers
688 num_rows
681 number of rows
689 number of rows
682 columns_width
690 column_widths
683 list of with of each columns
691 list of with of each columns
684 optimal_separator_width
692 optimal_separator_width
685 best separator width between columns
693 best separator width between columns
686
694
687 Examples
695 Examples
688 --------
696 --------
689 ::
697 ::
690
698
691 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
699 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
692 ...: compute_item_matrix(l,displaywidth=12)
700 ...: compute_item_matrix(l, displaywidth=12)
693 Out[1]:
701 Out[1]:
694 ([['aaa', 'f', 'k'],
702 ([['aaa', 'f', 'k'],
695 ['b', 'g', 'l'],
703 ['b', 'g', 'l'],
696 ['cc', 'h', None],
704 ['cc', 'h', None],
697 ['d', 'i', None],
705 ['d', 'i', None],
698 ['eeeee', 'j', None]],
706 ['eeeee', 'j', None]],
699 {'columns_numbers': 3,
707 {'num_columns': 3,
700 'columns_width': [5, 1, 1],
708 'column_widths': [5, 1, 1],
701 'optimal_separator_width': 2,
709 'optimal_separator_width': 2,
702 'rows_numbers': 5})
710 'num_rows': 5})
703 """
711 """
704 info = _find_optimal(list(map(len, items)), *args, **kwargs)
712 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
705 nrow, ncol = info['rows_numbers'], info['columns_numbers']
713 nrow, ncol = info['num_rows'], info['num_columns']
706 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
714 if row_first:
715 return ([[_get_or_default(items, c * nrow + r, default=empty) for r in range(nrow)] for c in range(ncol)], info)
716 else:
717 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
707
718
708
719
709 def columnize(items, separator=' ', displaywidth=80):
720 def columnize(items, row_first=False, separator=' ', displaywidth=80):
710 """ Transform a list of strings into a single string with columns.
721 """ Transform a list of strings into a single string with columns.
711
722
712 Parameters
723 Parameters
713 ----------
724 ----------
714 items : sequence of strings
725 items : sequence of strings
715 The strings to process.
726 The strings to process.
716
727
728 row_first : (default False)
729 Whether to to compute columns for a row-first matrix instead ofr
730 column-first (default).
731
717 separator : str, optional [default is two spaces]
732 separator : str, optional [default is two spaces]
718 The string that separates columns.
733 The string that separates columns.
719
734
720 displaywidth : int, optional [default is 80]
735 displaywidth : int, optional [default is 80]
721 Width of the display in number of characters.
736 Width of the display in number of characters.
722
737
723 Returns
738 Returns
724 -------
739 -------
725 The formatted string.
740 The formatted string.
726 """
741 """
727 if not items :
742 if not items:
728 return '\n'
743 return '\n'
729 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
744 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
730 fmatrix = [filter(None, x) for x in matrix]
745 fmatrix = [filter(None, x) for x in matrix]
731 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
746 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
732 return '\n'.join(map(sjoin, fmatrix))+'\n'
747 return '\n'.join(map(sjoin, fmatrix))+'\n'
733
748
734
749
735 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
750 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
736 """
751 """
737 Return a string with a natural enumeration of items
752 Return a string with a natural enumeration of items
738
753
739 >>> get_text_list(['a', 'b', 'c', 'd'])
754 >>> get_text_list(['a', 'b', 'c', 'd'])
740 'a, b, c and d'
755 'a, b, c and d'
741 >>> get_text_list(['a', 'b', 'c'], ' or ')
756 >>> get_text_list(['a', 'b', 'c'], ' or ')
742 'a, b or c'
757 'a, b or c'
743 >>> get_text_list(['a', 'b', 'c'], ', ')
758 >>> get_text_list(['a', 'b', 'c'], ', ')
744 'a, b, c'
759 'a, b, c'
745 >>> get_text_list(['a', 'b'], ' or ')
760 >>> get_text_list(['a', 'b'], ' or ')
746 'a or b'
761 'a or b'
747 >>> get_text_list(['a'])
762 >>> get_text_list(['a'])
748 'a'
763 'a'
749 >>> get_text_list([])
764 >>> get_text_list([])
750 ''
765 ''
751 >>> get_text_list(['a', 'b'], wrap_item_with="`")
766 >>> get_text_list(['a', 'b'], wrap_item_with="`")
752 '`a` and `b`'
767 '`a` and `b`'
753 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
768 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
754 'a + b + c = d'
769 'a + b + c = d'
755 """
770 """
756 if len(list_) == 0:
771 if len(list_) == 0:
757 return ''
772 return ''
758 if wrap_item_with:
773 if wrap_item_with:
759 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
774 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
760 item in list_]
775 item in list_]
761 if len(list_) == 1:
776 if len(list_) == 1:
762 return list_[0]
777 return list_[0]
763 return '%s%s%s' % (
778 return '%s%s%s' % (
764 sep.join(i for i in list_[:-1]),
779 sep.join(i for i in list_[:-1]),
765 last_sep, list_[-1])
780 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now