##// END OF EJS Templates
fix compute_item_matrix doctest
Paul Ivanov -
Show More
@@ -1,783 +1,778 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import os
12 import os
13 import re
13 import re
14 import sys
14 import sys
15 import textwrap
15 import textwrap
16 from string import Formatter
16 from string import Formatter
17 try:
17 try:
18 from pathlib import Path
18 from pathlib import Path
19 except ImportError:
19 except ImportError:
20 # Python 2 backport
20 # Python 2 backport
21 from pathlib2 import Path
21 from pathlib2 import Path
22
22
23 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
23 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
24 from IPython.utils import py3compat
24 from IPython.utils import py3compat
25
25
26 # datetime.strftime date format for ipython
26 # datetime.strftime date format for ipython
27 if sys.platform == 'win32':
27 if sys.platform == 'win32':
28 date_format = "%B %d, %Y"
28 date_format = "%B %d, %Y"
29 else:
29 else:
30 date_format = "%B %-d, %Y"
30 date_format = "%B %-d, %Y"
31
31
32 class LSString(str):
32 class LSString(str):
33 """String derivative with a special access attributes.
33 """String derivative with a special access attributes.
34
34
35 These are normal strings, but with the special attributes:
35 These are normal strings, but with the special attributes:
36
36
37 .l (or .list) : value as list (split on newlines).
37 .l (or .list) : value as list (split on newlines).
38 .n (or .nlstr): original value (the string itself).
38 .n (or .nlstr): original value (the string itself).
39 .s (or .spstr): value as whitespace-separated string.
39 .s (or .spstr): value as whitespace-separated string.
40 .p (or .paths): list of path objects (requires path.py package)
40 .p (or .paths): list of path objects (requires path.py package)
41
41
42 Any values which require transformations are computed only once and
42 Any values which require transformations are computed only once and
43 cached.
43 cached.
44
44
45 Such strings are very useful to efficiently interact with the shell, which
45 Such strings are very useful to efficiently interact with the shell, which
46 typically only understands whitespace-separated options for commands."""
46 typically only understands whitespace-separated options for commands."""
47
47
48 def get_list(self):
48 def get_list(self):
49 try:
49 try:
50 return self.__list
50 return self.__list
51 except AttributeError:
51 except AttributeError:
52 self.__list = self.split('\n')
52 self.__list = self.split('\n')
53 return self.__list
53 return self.__list
54
54
55 l = list = property(get_list)
55 l = list = property(get_list)
56
56
57 def get_spstr(self):
57 def get_spstr(self):
58 try:
58 try:
59 return self.__spstr
59 return self.__spstr
60 except AttributeError:
60 except AttributeError:
61 self.__spstr = self.replace('\n',' ')
61 self.__spstr = self.replace('\n',' ')
62 return self.__spstr
62 return self.__spstr
63
63
64 s = spstr = property(get_spstr)
64 s = spstr = property(get_spstr)
65
65
66 def get_nlstr(self):
66 def get_nlstr(self):
67 return self
67 return self
68
68
69 n = nlstr = property(get_nlstr)
69 n = nlstr = property(get_nlstr)
70
70
71 def get_paths(self):
71 def get_paths(self):
72 try:
72 try:
73 return self.__paths
73 return self.__paths
74 except AttributeError:
74 except AttributeError:
75 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
75 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
76 return self.__paths
76 return self.__paths
77
77
78 p = paths = property(get_paths)
78 p = paths = property(get_paths)
79
79
80 # FIXME: We need to reimplement type specific displayhook and then add this
80 # FIXME: We need to reimplement type specific displayhook and then add this
81 # back as a custom printer. This should also be moved outside utils into the
81 # back as a custom printer. This should also be moved outside utils into the
82 # core.
82 # core.
83
83
84 # def print_lsstring(arg):
84 # def print_lsstring(arg):
85 # """ Prettier (non-repr-like) and more informative printer for LSString """
85 # """ Prettier (non-repr-like) and more informative printer for LSString """
86 # print "LSString (.p, .n, .l, .s available). Value:"
86 # print "LSString (.p, .n, .l, .s available). Value:"
87 # print arg
87 # print arg
88 #
88 #
89 #
89 #
90 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
90 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
91
91
92
92
93 class SList(list):
93 class SList(list):
94 """List derivative with a special access attributes.
94 """List derivative with a special access attributes.
95
95
96 These are normal lists, but with the special attributes:
96 These are normal lists, but with the special attributes:
97
97
98 * .l (or .list) : value as list (the list itself).
98 * .l (or .list) : value as list (the list itself).
99 * .n (or .nlstr): value as a string, joined on newlines.
99 * .n (or .nlstr): value as a string, joined on newlines.
100 * .s (or .spstr): value as a string, joined on spaces.
100 * .s (or .spstr): value as a string, joined on spaces.
101 * .p (or .paths): list of path objects (requires path.py package)
101 * .p (or .paths): list of path objects (requires path.py package)
102
102
103 Any values which require transformations are computed only once and
103 Any values which require transformations are computed only once and
104 cached."""
104 cached."""
105
105
106 def get_list(self):
106 def get_list(self):
107 return self
107 return self
108
108
109 l = list = property(get_list)
109 l = list = property(get_list)
110
110
111 def get_spstr(self):
111 def get_spstr(self):
112 try:
112 try:
113 return self.__spstr
113 return self.__spstr
114 except AttributeError:
114 except AttributeError:
115 self.__spstr = ' '.join(self)
115 self.__spstr = ' '.join(self)
116 return self.__spstr
116 return self.__spstr
117
117
118 s = spstr = property(get_spstr)
118 s = spstr = property(get_spstr)
119
119
120 def get_nlstr(self):
120 def get_nlstr(self):
121 try:
121 try:
122 return self.__nlstr
122 return self.__nlstr
123 except AttributeError:
123 except AttributeError:
124 self.__nlstr = '\n'.join(self)
124 self.__nlstr = '\n'.join(self)
125 return self.__nlstr
125 return self.__nlstr
126
126
127 n = nlstr = property(get_nlstr)
127 n = nlstr = property(get_nlstr)
128
128
129 def get_paths(self):
129 def get_paths(self):
130 try:
130 try:
131 return self.__paths
131 return self.__paths
132 except AttributeError:
132 except AttributeError:
133 self.__paths = [Path(p) for p in self if os.path.exists(p)]
133 self.__paths = [Path(p) for p in self if os.path.exists(p)]
134 return self.__paths
134 return self.__paths
135
135
136 p = paths = property(get_paths)
136 p = paths = property(get_paths)
137
137
138 def grep(self, pattern, prune = False, field = None):
138 def grep(self, pattern, prune = False, field = None):
139 """ Return all strings matching 'pattern' (a regex or callable)
139 """ Return all strings matching 'pattern' (a regex or callable)
140
140
141 This is case-insensitive. If prune is true, return all items
141 This is case-insensitive. If prune is true, return all items
142 NOT matching the pattern.
142 NOT matching the pattern.
143
143
144 If field is specified, the match must occur in the specified
144 If field is specified, the match must occur in the specified
145 whitespace-separated field.
145 whitespace-separated field.
146
146
147 Examples::
147 Examples::
148
148
149 a.grep( lambda x: x.startswith('C') )
149 a.grep( lambda x: x.startswith('C') )
150 a.grep('Cha.*log', prune=1)
150 a.grep('Cha.*log', prune=1)
151 a.grep('chm', field=-1)
151 a.grep('chm', field=-1)
152 """
152 """
153
153
154 def match_target(s):
154 def match_target(s):
155 if field is None:
155 if field is None:
156 return s
156 return s
157 parts = s.split()
157 parts = s.split()
158 try:
158 try:
159 tgt = parts[field]
159 tgt = parts[field]
160 return tgt
160 return tgt
161 except IndexError:
161 except IndexError:
162 return ""
162 return ""
163
163
164 if isinstance(pattern, py3compat.string_types):
164 if isinstance(pattern, py3compat.string_types):
165 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
165 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
166 else:
166 else:
167 pred = pattern
167 pred = pattern
168 if not prune:
168 if not prune:
169 return SList([el for el in self if pred(match_target(el))])
169 return SList([el for el in self if pred(match_target(el))])
170 else:
170 else:
171 return SList([el for el in self if not pred(match_target(el))])
171 return SList([el for el in self if not pred(match_target(el))])
172
172
173 def fields(self, *fields):
173 def fields(self, *fields):
174 """ Collect whitespace-separated fields from string list
174 """ Collect whitespace-separated fields from string list
175
175
176 Allows quick awk-like usage of string lists.
176 Allows quick awk-like usage of string lists.
177
177
178 Example data (in var a, created by 'a = !ls -l')::
178 Example data (in var a, created by 'a = !ls -l')::
179
179
180 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
180 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
181 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
181 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
182
182
183 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
183 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
184 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
184 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
185 (note the joining by space).
185 (note the joining by space).
186 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
186 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
187
187
188 IndexErrors are ignored.
188 IndexErrors are ignored.
189
189
190 Without args, fields() just split()'s the strings.
190 Without args, fields() just split()'s the strings.
191 """
191 """
192 if len(fields) == 0:
192 if len(fields) == 0:
193 return [el.split() for el in self]
193 return [el.split() for el in self]
194
194
195 res = SList()
195 res = SList()
196 for el in [f.split() for f in self]:
196 for el in [f.split() for f in self]:
197 lineparts = []
197 lineparts = []
198
198
199 for fd in fields:
199 for fd in fields:
200 try:
200 try:
201 lineparts.append(el[fd])
201 lineparts.append(el[fd])
202 except IndexError:
202 except IndexError:
203 pass
203 pass
204 if lineparts:
204 if lineparts:
205 res.append(" ".join(lineparts))
205 res.append(" ".join(lineparts))
206
206
207 return res
207 return res
208
208
209 def sort(self,field= None, nums = False):
209 def sort(self,field= None, nums = False):
210 """ sort by specified fields (see fields())
210 """ sort by specified fields (see fields())
211
211
212 Example::
212 Example::
213
213
214 a.sort(1, nums = True)
214 a.sort(1, nums = True)
215
215
216 Sorts a by second field, in numerical order (so that 21 > 3)
216 Sorts a by second field, in numerical order (so that 21 > 3)
217
217
218 """
218 """
219
219
220 #decorate, sort, undecorate
220 #decorate, sort, undecorate
221 if field is not None:
221 if field is not None:
222 dsu = [[SList([line]).fields(field), line] for line in self]
222 dsu = [[SList([line]).fields(field), line] for line in self]
223 else:
223 else:
224 dsu = [[line, line] for line in self]
224 dsu = [[line, line] for line in self]
225 if nums:
225 if nums:
226 for i in range(len(dsu)):
226 for i in range(len(dsu)):
227 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
227 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
228 try:
228 try:
229 n = int(numstr)
229 n = int(numstr)
230 except ValueError:
230 except ValueError:
231 n = 0
231 n = 0
232 dsu[i][0] = n
232 dsu[i][0] = n
233
233
234
234
235 dsu.sort()
235 dsu.sort()
236 return SList([t[1] for t in dsu])
236 return SList([t[1] for t in dsu])
237
237
238
238
239 # FIXME: We need to reimplement type specific displayhook and then add this
239 # FIXME: We need to reimplement type specific displayhook and then add this
240 # back as a custom printer. This should also be moved outside utils into the
240 # back as a custom printer. This should also be moved outside utils into the
241 # core.
241 # core.
242
242
243 # def print_slist(arg):
243 # def print_slist(arg):
244 # """ Prettier (non-repr-like) and more informative printer for SList """
244 # """ Prettier (non-repr-like) and more informative printer for SList """
245 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
245 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
246 # if hasattr(arg, 'hideonce') and arg.hideonce:
246 # if hasattr(arg, 'hideonce') and arg.hideonce:
247 # arg.hideonce = False
247 # arg.hideonce = False
248 # return
248 # return
249 #
249 #
250 # nlprint(arg) # This was a nested list printer, now removed.
250 # nlprint(arg) # This was a nested list printer, now removed.
251 #
251 #
252 # print_slist = result_display.when_type(SList)(print_slist)
252 # print_slist = result_display.when_type(SList)(print_slist)
253
253
254
254
255 def indent(instr,nspaces=4, ntabs=0, flatten=False):
255 def indent(instr,nspaces=4, ntabs=0, flatten=False):
256 """Indent a string a given number of spaces or tabstops.
256 """Indent a string a given number of spaces or tabstops.
257
257
258 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
258 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
259
259
260 Parameters
260 Parameters
261 ----------
261 ----------
262
262
263 instr : basestring
263 instr : basestring
264 The string to be indented.
264 The string to be indented.
265 nspaces : int (default: 4)
265 nspaces : int (default: 4)
266 The number of spaces to be indented.
266 The number of spaces to be indented.
267 ntabs : int (default: 0)
267 ntabs : int (default: 0)
268 The number of tabs to be indented.
268 The number of tabs to be indented.
269 flatten : bool (default: False)
269 flatten : bool (default: False)
270 Whether to scrub existing indentation. If True, all lines will be
270 Whether to scrub existing indentation. If True, all lines will be
271 aligned to the same indentation. If False, existing indentation will
271 aligned to the same indentation. If False, existing indentation will
272 be strictly increased.
272 be strictly increased.
273
273
274 Returns
274 Returns
275 -------
275 -------
276
276
277 str|unicode : string indented by ntabs and nspaces.
277 str|unicode : string indented by ntabs and nspaces.
278
278
279 """
279 """
280 if instr is None:
280 if instr is None:
281 return
281 return
282 ind = '\t'*ntabs+' '*nspaces
282 ind = '\t'*ntabs+' '*nspaces
283 if flatten:
283 if flatten:
284 pat = re.compile(r'^\s*', re.MULTILINE)
284 pat = re.compile(r'^\s*', re.MULTILINE)
285 else:
285 else:
286 pat = re.compile(r'^', re.MULTILINE)
286 pat = re.compile(r'^', re.MULTILINE)
287 outstr = re.sub(pat, ind, instr)
287 outstr = re.sub(pat, ind, instr)
288 if outstr.endswith(os.linesep+ind):
288 if outstr.endswith(os.linesep+ind):
289 return outstr[:-len(ind)]
289 return outstr[:-len(ind)]
290 else:
290 else:
291 return outstr
291 return outstr
292
292
293
293
294 def list_strings(arg):
294 def list_strings(arg):
295 """Always return a list of strings, given a string or list of strings
295 """Always return a list of strings, given a string or list of strings
296 as input.
296 as input.
297
297
298 Examples
298 Examples
299 --------
299 --------
300 ::
300 ::
301
301
302 In [7]: list_strings('A single string')
302 In [7]: list_strings('A single string')
303 Out[7]: ['A single string']
303 Out[7]: ['A single string']
304
304
305 In [8]: list_strings(['A single string in a list'])
305 In [8]: list_strings(['A single string in a list'])
306 Out[8]: ['A single string in a list']
306 Out[8]: ['A single string in a list']
307
307
308 In [9]: list_strings(['A','list','of','strings'])
308 In [9]: list_strings(['A','list','of','strings'])
309 Out[9]: ['A', 'list', 'of', 'strings']
309 Out[9]: ['A', 'list', 'of', 'strings']
310 """
310 """
311
311
312 if isinstance(arg, py3compat.string_types): return [arg]
312 if isinstance(arg, py3compat.string_types): return [arg]
313 else: return arg
313 else: return arg
314
314
315
315
316 def marquee(txt='',width=78,mark='*'):
316 def marquee(txt='',width=78,mark='*'):
317 """Return the input string centered in a 'marquee'.
317 """Return the input string centered in a 'marquee'.
318
318
319 Examples
319 Examples
320 --------
320 --------
321 ::
321 ::
322
322
323 In [16]: marquee('A test',40)
323 In [16]: marquee('A test',40)
324 Out[16]: '**************** A test ****************'
324 Out[16]: '**************** A test ****************'
325
325
326 In [17]: marquee('A test',40,'-')
326 In [17]: marquee('A test',40,'-')
327 Out[17]: '---------------- A test ----------------'
327 Out[17]: '---------------- A test ----------------'
328
328
329 In [18]: marquee('A test',40,' ')
329 In [18]: marquee('A test',40,' ')
330 Out[18]: ' A test '
330 Out[18]: ' A test '
331
331
332 """
332 """
333 if not txt:
333 if not txt:
334 return (mark*width)[:width]
334 return (mark*width)[:width]
335 nmark = (width-len(txt)-2)//len(mark)//2
335 nmark = (width-len(txt)-2)//len(mark)//2
336 if nmark < 0: nmark =0
336 if nmark < 0: nmark =0
337 marks = mark*nmark
337 marks = mark*nmark
338 return '%s %s %s' % (marks,txt,marks)
338 return '%s %s %s' % (marks,txt,marks)
339
339
340
340
341 ini_spaces_re = re.compile(r'^(\s+)')
341 ini_spaces_re = re.compile(r'^(\s+)')
342
342
343 def num_ini_spaces(strng):
343 def num_ini_spaces(strng):
344 """Return the number of initial spaces in a string"""
344 """Return the number of initial spaces in a string"""
345
345
346 ini_spaces = ini_spaces_re.match(strng)
346 ini_spaces = ini_spaces_re.match(strng)
347 if ini_spaces:
347 if ini_spaces:
348 return ini_spaces.end()
348 return ini_spaces.end()
349 else:
349 else:
350 return 0
350 return 0
351
351
352
352
353 def format_screen(strng):
353 def format_screen(strng):
354 """Format a string for screen printing.
354 """Format a string for screen printing.
355
355
356 This removes some latex-type format codes."""
356 This removes some latex-type format codes."""
357 # Paragraph continue
357 # Paragraph continue
358 par_re = re.compile(r'\\$',re.MULTILINE)
358 par_re = re.compile(r'\\$',re.MULTILINE)
359 strng = par_re.sub('',strng)
359 strng = par_re.sub('',strng)
360 return strng
360 return strng
361
361
362
362
363 def dedent(text):
363 def dedent(text):
364 """Equivalent of textwrap.dedent that ignores unindented first line.
364 """Equivalent of textwrap.dedent that ignores unindented first line.
365
365
366 This means it will still dedent strings like:
366 This means it will still dedent strings like:
367 '''foo
367 '''foo
368 is a bar
368 is a bar
369 '''
369 '''
370
370
371 For use in wrap_paragraphs.
371 For use in wrap_paragraphs.
372 """
372 """
373
373
374 if text.startswith('\n'):
374 if text.startswith('\n'):
375 # text starts with blank line, don't ignore the first line
375 # text starts with blank line, don't ignore the first line
376 return textwrap.dedent(text)
376 return textwrap.dedent(text)
377
377
378 # split first line
378 # split first line
379 splits = text.split('\n',1)
379 splits = text.split('\n',1)
380 if len(splits) == 1:
380 if len(splits) == 1:
381 # only one line
381 # only one line
382 return textwrap.dedent(text)
382 return textwrap.dedent(text)
383
383
384 first, rest = splits
384 first, rest = splits
385 # dedent everything but the first line
385 # dedent everything but the first line
386 rest = textwrap.dedent(rest)
386 rest = textwrap.dedent(rest)
387 return '\n'.join([first, rest])
387 return '\n'.join([first, rest])
388
388
389
389
390 def wrap_paragraphs(text, ncols=80):
390 def wrap_paragraphs(text, ncols=80):
391 """Wrap multiple paragraphs to fit a specified width.
391 """Wrap multiple paragraphs to fit a specified width.
392
392
393 This is equivalent to textwrap.wrap, but with support for multiple
393 This is equivalent to textwrap.wrap, but with support for multiple
394 paragraphs, as separated by empty lines.
394 paragraphs, as separated by empty lines.
395
395
396 Returns
396 Returns
397 -------
397 -------
398
398
399 list of complete paragraphs, wrapped to fill `ncols` columns.
399 list of complete paragraphs, wrapped to fill `ncols` columns.
400 """
400 """
401 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
401 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
402 text = dedent(text).strip()
402 text = dedent(text).strip()
403 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
403 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
404 out_ps = []
404 out_ps = []
405 indent_re = re.compile(r'\n\s+', re.MULTILINE)
405 indent_re = re.compile(r'\n\s+', re.MULTILINE)
406 for p in paragraphs:
406 for p in paragraphs:
407 # presume indentation that survives dedent is meaningful formatting,
407 # presume indentation that survives dedent is meaningful formatting,
408 # so don't fill unless text is flush.
408 # so don't fill unless text is flush.
409 if indent_re.search(p) is None:
409 if indent_re.search(p) is None:
410 # wrap paragraph
410 # wrap paragraph
411 p = textwrap.fill(p, ncols)
411 p = textwrap.fill(p, ncols)
412 out_ps.append(p)
412 out_ps.append(p)
413 return out_ps
413 return out_ps
414
414
415
415
416 def long_substr(data):
416 def long_substr(data):
417 """Return the longest common substring in a list of strings.
417 """Return the longest common substring in a list of strings.
418
418
419 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
419 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
420 """
420 """
421 substr = ''
421 substr = ''
422 if len(data) > 1 and len(data[0]) > 0:
422 if len(data) > 1 and len(data[0]) > 0:
423 for i in range(len(data[0])):
423 for i in range(len(data[0])):
424 for j in range(len(data[0])-i+1):
424 for j in range(len(data[0])-i+1):
425 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
425 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
426 substr = data[0][i:i+j]
426 substr = data[0][i:i+j]
427 elif len(data) == 1:
427 elif len(data) == 1:
428 substr = data[0]
428 substr = data[0]
429 return substr
429 return substr
430
430
431
431
432 def strip_email_quotes(text):
432 def strip_email_quotes(text):
433 """Strip leading email quotation characters ('>').
433 """Strip leading email quotation characters ('>').
434
434
435 Removes any combination of leading '>' interspersed with whitespace that
435 Removes any combination of leading '>' interspersed with whitespace that
436 appears *identically* in all lines of the input text.
436 appears *identically* in all lines of the input text.
437
437
438 Parameters
438 Parameters
439 ----------
439 ----------
440 text : str
440 text : str
441
441
442 Examples
442 Examples
443 --------
443 --------
444
444
445 Simple uses::
445 Simple uses::
446
446
447 In [2]: strip_email_quotes('> > text')
447 In [2]: strip_email_quotes('> > text')
448 Out[2]: 'text'
448 Out[2]: 'text'
449
449
450 In [3]: strip_email_quotes('> > text\\n> > more')
450 In [3]: strip_email_quotes('> > text\\n> > more')
451 Out[3]: 'text\\nmore'
451 Out[3]: 'text\\nmore'
452
452
453 Note how only the common prefix that appears in all lines is stripped::
453 Note how only the common prefix that appears in all lines is stripped::
454
454
455 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
455 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
456 Out[4]: '> text\\n> more\\nmore...'
456 Out[4]: '> text\\n> more\\nmore...'
457
457
458 So if any line has no quote marks ('>') , then none are stripped from any
458 So if any line has no quote marks ('>') , then none are stripped from any
459 of them ::
459 of them ::
460
460
461 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
461 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
462 Out[5]: '> > text\\n> > more\\nlast different'
462 Out[5]: '> > text\\n> > more\\nlast different'
463 """
463 """
464 lines = text.splitlines()
464 lines = text.splitlines()
465 matches = set()
465 matches = set()
466 for line in lines:
466 for line in lines:
467 prefix = re.match(r'^(\s*>[ >]*)', line)
467 prefix = re.match(r'^(\s*>[ >]*)', line)
468 if prefix:
468 if prefix:
469 matches.add(prefix.group(1))
469 matches.add(prefix.group(1))
470 else:
470 else:
471 break
471 break
472 else:
472 else:
473 prefix = long_substr(list(matches))
473 prefix = long_substr(list(matches))
474 if prefix:
474 if prefix:
475 strip = len(prefix)
475 strip = len(prefix)
476 text = '\n'.join([ ln[strip:] for ln in lines])
476 text = '\n'.join([ ln[strip:] for ln in lines])
477 return text
477 return text
478
478
479 def strip_ansi(source):
479 def strip_ansi(source):
480 """
480 """
481 Remove ansi escape codes from text.
481 Remove ansi escape codes from text.
482
482
483 Parameters
483 Parameters
484 ----------
484 ----------
485 source : str
485 source : str
486 Source to remove the ansi from
486 Source to remove the ansi from
487 """
487 """
488 return re.sub(r'\033\[(\d|;)+?m', '', source)
488 return re.sub(r'\033\[(\d|;)+?m', '', source)
489
489
490
490
491 class EvalFormatter(Formatter):
491 class EvalFormatter(Formatter):
492 """A String Formatter that allows evaluation of simple expressions.
492 """A String Formatter that allows evaluation of simple expressions.
493
493
494 Note that this version interprets a : as specifying a format string (as per
494 Note that this version interprets a : as specifying a format string (as per
495 standard string formatting), so if slicing is required, you must explicitly
495 standard string formatting), so if slicing is required, you must explicitly
496 create a slice.
496 create a slice.
497
497
498 This is to be used in templating cases, such as the parallel batch
498 This is to be used in templating cases, such as the parallel batch
499 script templates, where simple arithmetic on arguments is useful.
499 script templates, where simple arithmetic on arguments is useful.
500
500
501 Examples
501 Examples
502 --------
502 --------
503 ::
503 ::
504
504
505 In [1]: f = EvalFormatter()
505 In [1]: f = EvalFormatter()
506 In [2]: f.format('{n//4}', n=8)
506 In [2]: f.format('{n//4}', n=8)
507 Out[2]: '2'
507 Out[2]: '2'
508
508
509 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
509 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
510 Out[3]: 'll'
510 Out[3]: 'll'
511 """
511 """
512 def get_field(self, name, args, kwargs):
512 def get_field(self, name, args, kwargs):
513 v = eval(name, kwargs)
513 v = eval(name, kwargs)
514 return v, name
514 return v, name
515
515
516 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
516 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
517 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
517 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
518 # above, it should be possible to remove FullEvalFormatter.
518 # above, it should be possible to remove FullEvalFormatter.
519
519
520 @skip_doctest_py3
520 @skip_doctest_py3
521 class FullEvalFormatter(Formatter):
521 class FullEvalFormatter(Formatter):
522 """A String Formatter that allows evaluation of simple expressions.
522 """A String Formatter that allows evaluation of simple expressions.
523
523
524 Any time a format key is not found in the kwargs,
524 Any time a format key is not found in the kwargs,
525 it will be tried as an expression in the kwargs namespace.
525 it will be tried as an expression in the kwargs namespace.
526
526
527 Note that this version allows slicing using [1:2], so you cannot specify
527 Note that this version allows slicing using [1:2], so you cannot specify
528 a format string. Use :class:`EvalFormatter` to permit format strings.
528 a format string. Use :class:`EvalFormatter` to permit format strings.
529
529
530 Examples
530 Examples
531 --------
531 --------
532 ::
532 ::
533
533
534 In [1]: f = FullEvalFormatter()
534 In [1]: f = FullEvalFormatter()
535 In [2]: f.format('{n//4}', n=8)
535 In [2]: f.format('{n//4}', n=8)
536 Out[2]: u'2'
536 Out[2]: u'2'
537
537
538 In [3]: f.format('{list(range(5))[2:4]}')
538 In [3]: f.format('{list(range(5))[2:4]}')
539 Out[3]: u'[2, 3]'
539 Out[3]: u'[2, 3]'
540
540
541 In [4]: f.format('{3*2}')
541 In [4]: f.format('{3*2}')
542 Out[4]: u'6'
542 Out[4]: u'6'
543 """
543 """
544 # copied from Formatter._vformat with minor changes to allow eval
544 # copied from Formatter._vformat with minor changes to allow eval
545 # and replace the format_spec code with slicing
545 # and replace the format_spec code with slicing
546 def vformat(self, format_string, args, kwargs):
546 def vformat(self, format_string, args, kwargs):
547 result = []
547 result = []
548 for literal_text, field_name, format_spec, conversion in \
548 for literal_text, field_name, format_spec, conversion in \
549 self.parse(format_string):
549 self.parse(format_string):
550
550
551 # output the literal text
551 # output the literal text
552 if literal_text:
552 if literal_text:
553 result.append(literal_text)
553 result.append(literal_text)
554
554
555 # if there's a field, output it
555 # if there's a field, output it
556 if field_name is not None:
556 if field_name is not None:
557 # this is some markup, find the object and do
557 # this is some markup, find the object and do
558 # the formatting
558 # the formatting
559
559
560 if format_spec:
560 if format_spec:
561 # override format spec, to allow slicing:
561 # override format spec, to allow slicing:
562 field_name = ':'.join([field_name, format_spec])
562 field_name = ':'.join([field_name, format_spec])
563
563
564 # eval the contents of the field for the object
564 # eval the contents of the field for the object
565 # to be formatted
565 # to be formatted
566 obj = eval(field_name, kwargs)
566 obj = eval(field_name, kwargs)
567
567
568 # do any conversion on the resulting object
568 # do any conversion on the resulting object
569 obj = self.convert_field(obj, conversion)
569 obj = self.convert_field(obj, conversion)
570
570
571 # format the object and append to the result
571 # format the object and append to the result
572 result.append(self.format_field(obj, ''))
572 result.append(self.format_field(obj, ''))
573
573
574 return u''.join(py3compat.cast_unicode(s) for s in result)
574 return u''.join(py3compat.cast_unicode(s) for s in result)
575
575
576
576
577 @skip_doctest_py3
577 @skip_doctest_py3
578 class DollarFormatter(FullEvalFormatter):
578 class DollarFormatter(FullEvalFormatter):
579 """Formatter allowing Itpl style $foo replacement, for names and attribute
579 """Formatter allowing Itpl style $foo replacement, for names and attribute
580 access only. Standard {foo} replacement also works, and allows full
580 access only. Standard {foo} replacement also works, and allows full
581 evaluation of its arguments.
581 evaluation of its arguments.
582
582
583 Examples
583 Examples
584 --------
584 --------
585 ::
585 ::
586
586
587 In [1]: f = DollarFormatter()
587 In [1]: f = DollarFormatter()
588 In [2]: f.format('{n//4}', n=8)
588 In [2]: f.format('{n//4}', n=8)
589 Out[2]: u'2'
589 Out[2]: u'2'
590
590
591 In [3]: f.format('23 * 76 is $result', result=23*76)
591 In [3]: f.format('23 * 76 is $result', result=23*76)
592 Out[3]: u'23 * 76 is 1748'
592 Out[3]: u'23 * 76 is 1748'
593
593
594 In [4]: f.format('$a or {b}', a=1, b=2)
594 In [4]: f.format('$a or {b}', a=1, b=2)
595 Out[4]: u'1 or 2'
595 Out[4]: u'1 or 2'
596 """
596 """
597 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
597 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
598 def parse(self, fmt_string):
598 def parse(self, fmt_string):
599 for literal_txt, field_name, format_spec, conversion \
599 for literal_txt, field_name, format_spec, conversion \
600 in Formatter.parse(self, fmt_string):
600 in Formatter.parse(self, fmt_string):
601
601
602 # Find $foo patterns in the literal text.
602 # Find $foo patterns in the literal text.
603 continue_from = 0
603 continue_from = 0
604 txt = ""
604 txt = ""
605 for m in self._dollar_pattern.finditer(literal_txt):
605 for m in self._dollar_pattern.finditer(literal_txt):
606 new_txt, new_field = m.group(1,2)
606 new_txt, new_field = m.group(1,2)
607 # $$foo --> $foo
607 # $$foo --> $foo
608 if new_field.startswith("$"):
608 if new_field.startswith("$"):
609 txt += new_txt + new_field
609 txt += new_txt + new_field
610 else:
610 else:
611 yield (txt + new_txt, new_field, "", None)
611 yield (txt + new_txt, new_field, "", None)
612 txt = ""
612 txt = ""
613 continue_from = m.end()
613 continue_from = m.end()
614
614
615 # Re-yield the {foo} style pattern
615 # Re-yield the {foo} style pattern
616 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
616 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
617
617
618 #-----------------------------------------------------------------------------
618 #-----------------------------------------------------------------------------
619 # Utils to columnize a list of string
619 # Utils to columnize a list of string
620 #-----------------------------------------------------------------------------
620 #-----------------------------------------------------------------------------
621
621
622 def _col_chunks(l, max_rows, row_first=False):
622 def _col_chunks(l, max_rows, row_first=False):
623 """Yield successive max_rows-sized column chunks from l."""
623 """Yield successive max_rows-sized column chunks from l."""
624 if row_first:
624 if row_first:
625 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
625 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
626 for i in py3compat.xrange(ncols):
626 for i in py3compat.xrange(ncols):
627 yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
627 yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
628 else:
628 else:
629 for i in py3compat.xrange(0, len(l), max_rows):
629 for i in py3compat.xrange(0, len(l), max_rows):
630 yield l[i:(i + max_rows)]
630 yield l[i:(i + max_rows)]
631
631
632
632
633 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
633 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
634 """Calculate optimal info to columnize a list of string"""
634 """Calculate optimal info to columnize a list of string"""
635 for max_rows in range(1, len(rlist) + 1):
635 for max_rows in range(1, len(rlist) + 1):
636 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
636 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
637 sumlength = sum(col_widths)
637 sumlength = sum(col_widths)
638 ncols = len(col_widths)
638 ncols = len(col_widths)
639 if sumlength + separator_size * (ncols - 1) <= displaywidth:
639 if sumlength + separator_size * (ncols - 1) <= displaywidth:
640 break
640 break
641 return {'num_columns': ncols,
641 return {'num_columns': ncols,
642 'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
642 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
643 'max_rows': max_rows,
643 'max_rows': max_rows,
644 'column_widths': col_widths
644 'column_widths': col_widths
645 }
645 }
646
646
647
647
648 def _get_or_default(mylist, i, default=None):
648 def _get_or_default(mylist, i, default=None):
649 """return list item number, or default if don't exist"""
649 """return list item number, or default if don't exist"""
650 if i >= len(mylist):
650 if i >= len(mylist):
651 return default
651 return default
652 else :
652 else :
653 return mylist[i]
653 return mylist[i]
654
654
655
655
656 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
656 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
657 """Returns a nested list, and info to columnize items
657 """Returns a nested list, and info to columnize items
658
658
659 Parameters
659 Parameters
660 ----------
660 ----------
661
661
662 items
662 items
663 list of strings to columize
663 list of strings to columize
664 row_first : (default False)
664 row_first : (default False)
665 Whether to compute columns for a row-first matrix instead of
665 Whether to compute columns for a row-first matrix instead of
666 column-first (default).
666 column-first (default).
667 empty : (default None)
667 empty : (default None)
668 default value to fill list if needed
668 default value to fill list if needed
669 separator_size : int (default=2)
669 separator_size : int (default=2)
670 How much caracters will be used as a separation between each columns.
670 How much caracters will be used as a separation between each columns.
671 displaywidth : int (default=80)
671 displaywidth : int (default=80)
672 The width of the area onto wich the columns should enter
672 The width of the area onto wich the columns should enter
673
673
674 Returns
674 Returns
675 -------
675 -------
676
676
677 strings_matrix
677 strings_matrix
678
678
679 nested list of string, the outer most list contains as many list as
679 nested list of string, the outer most list contains as many list as
680 rows, the innermost lists have each as many element as colums. If the
680 rows, the innermost lists have each as many element as colums. If the
681 total number of elements in `items` does not equal the product of
681 total number of elements in `items` does not equal the product of
682 rows*columns, the last element of some lists are filled with `None`.
682 rows*columns, the last element of some lists are filled with `None`.
683
683
684 dict_info
684 dict_info
685 some info to make columnize easier:
685 some info to make columnize easier:
686
686
687 num_columns
687 num_columns
688 number of columns
688 number of columns
689 max_rows
689 max_rows
690 maximum number of rows (final number may be less)
690 maximum number of rows (final number may be less)
691 column_widths
691 column_widths
692 list of with of each columns
692 list of with of each columns
693 optimal_separator_width
693 optimal_separator_width
694 best separator width between columns
694 best separator width between columns
695
695
696 Examples
696 Examples
697 --------
697 --------
698 ::
698 ::
699
699
700 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
700 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
701 ...: compute_item_matrix(l, displaywidth=12)
701 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
702 Out[1]:
702 In [3]: list
703 ([['aaa', 'f', 'k'],
703 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
704 ['b', 'g', 'l'],
704 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
705 ['cc', 'h', None],
705 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
706 ['d', 'i', None],
706 Out[5]: True
707 ['eeeee', 'j', None]],
708 {'num_columns': 3,
709 'column_widths': [5, 1, 1],
710 'optimal_separator_width': 2,
711 'max_rows': 5})
712 """
707 """
713 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
708 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
714 nrow, ncol = info['max_rows'], info['num_columns']
709 nrow, ncol = info['max_rows'], info['num_columns']
715 if row_first:
710 if row_first:
716 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
711 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
717 else:
712 else:
718 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
713 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
719
714
720
715
721 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
716 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
722 """ Transform a list of strings into a single string with columns.
717 """ Transform a list of strings into a single string with columns.
723
718
724 Parameters
719 Parameters
725 ----------
720 ----------
726 items : sequence of strings
721 items : sequence of strings
727 The strings to process.
722 The strings to process.
728
723
729 row_first : (default False)
724 row_first : (default False)
730 Whether to compute columns for a row-first matrix instead of
725 Whether to compute columns for a row-first matrix instead of
731 column-first (default).
726 column-first (default).
732
727
733 separator : str, optional [default is two spaces]
728 separator : str, optional [default is two spaces]
734 The string that separates columns.
729 The string that separates columns.
735
730
736 displaywidth : int, optional [default is 80]
731 displaywidth : int, optional [default is 80]
737 Width of the display in number of characters.
732 Width of the display in number of characters.
738
733
739 Returns
734 Returns
740 -------
735 -------
741 The formatted string.
736 The formatted string.
742 """
737 """
743 if not items:
738 if not items:
744 return '\n'
739 return '\n'
745 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
740 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
746 if spread:
741 if spread:
747 separator = separator.ljust(int(info['optimal_separator_width']))
742 separator = separator.ljust(int(info['optimal_separator_width']))
748 fmatrix = [filter(None, x) for x in matrix]
743 fmatrix = [filter(None, x) for x in matrix]
749 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
744 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
750 return '\n'.join(map(sjoin, fmatrix))+'\n'
745 return '\n'.join(map(sjoin, fmatrix))+'\n'
751
746
752
747
753 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
748 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
754 """
749 """
755 Return a string with a natural enumeration of items
750 Return a string with a natural enumeration of items
756
751
757 >>> get_text_list(['a', 'b', 'c', 'd'])
752 >>> get_text_list(['a', 'b', 'c', 'd'])
758 'a, b, c and d'
753 'a, b, c and d'
759 >>> get_text_list(['a', 'b', 'c'], ' or ')
754 >>> get_text_list(['a', 'b', 'c'], ' or ')
760 'a, b or c'
755 'a, b or c'
761 >>> get_text_list(['a', 'b', 'c'], ', ')
756 >>> get_text_list(['a', 'b', 'c'], ', ')
762 'a, b, c'
757 'a, b, c'
763 >>> get_text_list(['a', 'b'], ' or ')
758 >>> get_text_list(['a', 'b'], ' or ')
764 'a or b'
759 'a or b'
765 >>> get_text_list(['a'])
760 >>> get_text_list(['a'])
766 'a'
761 'a'
767 >>> get_text_list([])
762 >>> get_text_list([])
768 ''
763 ''
769 >>> get_text_list(['a', 'b'], wrap_item_with="`")
764 >>> get_text_list(['a', 'b'], wrap_item_with="`")
770 '`a` and `b`'
765 '`a` and `b`'
771 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
766 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
772 'a + b + c = d'
767 'a + b + c = d'
773 """
768 """
774 if len(list_) == 0:
769 if len(list_) == 0:
775 return ''
770 return ''
776 if wrap_item_with:
771 if wrap_item_with:
777 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
772 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
778 item in list_]
773 item in list_]
779 if len(list_) == 1:
774 if len(list_) == 1:
780 return list_[0]
775 return list_[0]
781 return '%s%s%s' % (
776 return '%s%s%s' % (
782 sep.join(i for i in list_[:-1]),
777 sep.join(i for i in list_[:-1]),
783 last_sep, list_[-1])
778 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now