##// END OF EJS Templates
Backport PR #8985: override vformat instead of _vformat...
Min RK -
Show More
@@ -1,784 +1,782 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2008-2011 The IPython Development Team
12 # Copyright (C) 2008-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21
21
22 import os
22 import os
23 import re
23 import re
24 import sys
24 import sys
25 import textwrap
25 import textwrap
26 from string import Formatter
26 from string import Formatter
27
27
28 from IPython.external.path import path
28 from IPython.external.path import path
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
30 from IPython.utils import py3compat
30 from IPython.utils import py3compat
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Declarations
33 # Declarations
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 # datetime.strftime date format for ipython
36 # datetime.strftime date format for ipython
37 if sys.platform == 'win32':
37 if sys.platform == 'win32':
38 date_format = "%B %d, %Y"
38 date_format = "%B %d, %Y"
39 else:
39 else:
40 date_format = "%B %-d, %Y"
40 date_format = "%B %-d, %Y"
41
41
42
42
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44 # Code
44 # Code
45 #-----------------------------------------------------------------------------
45 #-----------------------------------------------------------------------------
46
46
47 class LSString(str):
47 class LSString(str):
48 """String derivative with a special access attributes.
48 """String derivative with a special access attributes.
49
49
50 These are normal strings, but with the special attributes:
50 These are normal strings, but with the special attributes:
51
51
52 .l (or .list) : value as list (split on newlines).
52 .l (or .list) : value as list (split on newlines).
53 .n (or .nlstr): original value (the string itself).
53 .n (or .nlstr): original value (the string itself).
54 .s (or .spstr): value as whitespace-separated string.
54 .s (or .spstr): value as whitespace-separated string.
55 .p (or .paths): list of path objects
55 .p (or .paths): list of path objects
56
56
57 Any values which require transformations are computed only once and
57 Any values which require transformations are computed only once and
58 cached.
58 cached.
59
59
60 Such strings are very useful to efficiently interact with the shell, which
60 Such strings are very useful to efficiently interact with the shell, which
61 typically only understands whitespace-separated options for commands."""
61 typically only understands whitespace-separated options for commands."""
62
62
63 def get_list(self):
63 def get_list(self):
64 try:
64 try:
65 return self.__list
65 return self.__list
66 except AttributeError:
66 except AttributeError:
67 self.__list = self.split('\n')
67 self.__list = self.split('\n')
68 return self.__list
68 return self.__list
69
69
70 l = list = property(get_list)
70 l = list = property(get_list)
71
71
72 def get_spstr(self):
72 def get_spstr(self):
73 try:
73 try:
74 return self.__spstr
74 return self.__spstr
75 except AttributeError:
75 except AttributeError:
76 self.__spstr = self.replace('\n',' ')
76 self.__spstr = self.replace('\n',' ')
77 return self.__spstr
77 return self.__spstr
78
78
79 s = spstr = property(get_spstr)
79 s = spstr = property(get_spstr)
80
80
81 def get_nlstr(self):
81 def get_nlstr(self):
82 return self
82 return self
83
83
84 n = nlstr = property(get_nlstr)
84 n = nlstr = property(get_nlstr)
85
85
86 def get_paths(self):
86 def get_paths(self):
87 try:
87 try:
88 return self.__paths
88 return self.__paths
89 except AttributeError:
89 except AttributeError:
90 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
90 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
91 return self.__paths
91 return self.__paths
92
92
93 p = paths = property(get_paths)
93 p = paths = property(get_paths)
94
94
95 # FIXME: We need to reimplement type specific displayhook and then add this
95 # FIXME: We need to reimplement type specific displayhook and then add this
96 # back as a custom printer. This should also be moved outside utils into the
96 # back as a custom printer. This should also be moved outside utils into the
97 # core.
97 # core.
98
98
99 # def print_lsstring(arg):
99 # def print_lsstring(arg):
100 # """ Prettier (non-repr-like) and more informative printer for LSString """
100 # """ Prettier (non-repr-like) and more informative printer for LSString """
101 # print "LSString (.p, .n, .l, .s available). Value:"
101 # print "LSString (.p, .n, .l, .s available). Value:"
102 # print arg
102 # print arg
103 #
103 #
104 #
104 #
105 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
105 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
106
106
107
107
108 class SList(list):
108 class SList(list):
109 """List derivative with a special access attributes.
109 """List derivative with a special access attributes.
110
110
111 These are normal lists, but with the special attributes:
111 These are normal lists, but with the special attributes:
112
112
113 * .l (or .list) : value as list (the list itself).
113 * .l (or .list) : value as list (the list itself).
114 * .n (or .nlstr): value as a string, joined on newlines.
114 * .n (or .nlstr): value as a string, joined on newlines.
115 * .s (or .spstr): value as a string, joined on spaces.
115 * .s (or .spstr): value as a string, joined on spaces.
116 * .p (or .paths): list of path objects
116 * .p (or .paths): list of path objects
117
117
118 Any values which require transformations are computed only once and
118 Any values which require transformations are computed only once and
119 cached."""
119 cached."""
120
120
121 def get_list(self):
121 def get_list(self):
122 return self
122 return self
123
123
124 l = list = property(get_list)
124 l = list = property(get_list)
125
125
126 def get_spstr(self):
126 def get_spstr(self):
127 try:
127 try:
128 return self.__spstr
128 return self.__spstr
129 except AttributeError:
129 except AttributeError:
130 self.__spstr = ' '.join(self)
130 self.__spstr = ' '.join(self)
131 return self.__spstr
131 return self.__spstr
132
132
133 s = spstr = property(get_spstr)
133 s = spstr = property(get_spstr)
134
134
135 def get_nlstr(self):
135 def get_nlstr(self):
136 try:
136 try:
137 return self.__nlstr
137 return self.__nlstr
138 except AttributeError:
138 except AttributeError:
139 self.__nlstr = '\n'.join(self)
139 self.__nlstr = '\n'.join(self)
140 return self.__nlstr
140 return self.__nlstr
141
141
142 n = nlstr = property(get_nlstr)
142 n = nlstr = property(get_nlstr)
143
143
144 def get_paths(self):
144 def get_paths(self):
145 try:
145 try:
146 return self.__paths
146 return self.__paths
147 except AttributeError:
147 except AttributeError:
148 self.__paths = [path(p) for p in self if os.path.exists(p)]
148 self.__paths = [path(p) for p in self if os.path.exists(p)]
149 return self.__paths
149 return self.__paths
150
150
151 p = paths = property(get_paths)
151 p = paths = property(get_paths)
152
152
153 def grep(self, pattern, prune = False, field = None):
153 def grep(self, pattern, prune = False, field = None):
154 """ Return all strings matching 'pattern' (a regex or callable)
154 """ Return all strings matching 'pattern' (a regex or callable)
155
155
156 This is case-insensitive. If prune is true, return all items
156 This is case-insensitive. If prune is true, return all items
157 NOT matching the pattern.
157 NOT matching the pattern.
158
158
159 If field is specified, the match must occur in the specified
159 If field is specified, the match must occur in the specified
160 whitespace-separated field.
160 whitespace-separated field.
161
161
162 Examples::
162 Examples::
163
163
164 a.grep( lambda x: x.startswith('C') )
164 a.grep( lambda x: x.startswith('C') )
165 a.grep('Cha.*log', prune=1)
165 a.grep('Cha.*log', prune=1)
166 a.grep('chm', field=-1)
166 a.grep('chm', field=-1)
167 """
167 """
168
168
169 def match_target(s):
169 def match_target(s):
170 if field is None:
170 if field is None:
171 return s
171 return s
172 parts = s.split()
172 parts = s.split()
173 try:
173 try:
174 tgt = parts[field]
174 tgt = parts[field]
175 return tgt
175 return tgt
176 except IndexError:
176 except IndexError:
177 return ""
177 return ""
178
178
179 if isinstance(pattern, py3compat.string_types):
179 if isinstance(pattern, py3compat.string_types):
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
181 else:
181 else:
182 pred = pattern
182 pred = pattern
183 if not prune:
183 if not prune:
184 return SList([el for el in self if pred(match_target(el))])
184 return SList([el for el in self if pred(match_target(el))])
185 else:
185 else:
186 return SList([el for el in self if not pred(match_target(el))])
186 return SList([el for el in self if not pred(match_target(el))])
187
187
188 def fields(self, *fields):
188 def fields(self, *fields):
189 """ Collect whitespace-separated fields from string list
189 """ Collect whitespace-separated fields from string list
190
190
191 Allows quick awk-like usage of string lists.
191 Allows quick awk-like usage of string lists.
192
192
193 Example data (in var a, created by 'a = !ls -l')::
193 Example data (in var a, created by 'a = !ls -l')::
194
194
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
197
197
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
200 (note the joining by space).
200 (note the joining by space).
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
202
202
203 IndexErrors are ignored.
203 IndexErrors are ignored.
204
204
205 Without args, fields() just split()'s the strings.
205 Without args, fields() just split()'s the strings.
206 """
206 """
207 if len(fields) == 0:
207 if len(fields) == 0:
208 return [el.split() for el in self]
208 return [el.split() for el in self]
209
209
210 res = SList()
210 res = SList()
211 for el in [f.split() for f in self]:
211 for el in [f.split() for f in self]:
212 lineparts = []
212 lineparts = []
213
213
214 for fd in fields:
214 for fd in fields:
215 try:
215 try:
216 lineparts.append(el[fd])
216 lineparts.append(el[fd])
217 except IndexError:
217 except IndexError:
218 pass
218 pass
219 if lineparts:
219 if lineparts:
220 res.append(" ".join(lineparts))
220 res.append(" ".join(lineparts))
221
221
222 return res
222 return res
223
223
224 def sort(self,field= None, nums = False):
224 def sort(self,field= None, nums = False):
225 """ sort by specified fields (see fields())
225 """ sort by specified fields (see fields())
226
226
227 Example::
227 Example::
228
228
229 a.sort(1, nums = True)
229 a.sort(1, nums = True)
230
230
231 Sorts a by second field, in numerical order (so that 21 > 3)
231 Sorts a by second field, in numerical order (so that 21 > 3)
232
232
233 """
233 """
234
234
235 #decorate, sort, undecorate
235 #decorate, sort, undecorate
236 if field is not None:
236 if field is not None:
237 dsu = [[SList([line]).fields(field), line] for line in self]
237 dsu = [[SList([line]).fields(field), line] for line in self]
238 else:
238 else:
239 dsu = [[line, line] for line in self]
239 dsu = [[line, line] for line in self]
240 if nums:
240 if nums:
241 for i in range(len(dsu)):
241 for i in range(len(dsu)):
242 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
242 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
243 try:
243 try:
244 n = int(numstr)
244 n = int(numstr)
245 except ValueError:
245 except ValueError:
246 n = 0;
246 n = 0;
247 dsu[i][0] = n
247 dsu[i][0] = n
248
248
249
249
250 dsu.sort()
250 dsu.sort()
251 return SList([t[1] for t in dsu])
251 return SList([t[1] for t in dsu])
252
252
253
253
254 # FIXME: We need to reimplement type specific displayhook and then add this
254 # FIXME: We need to reimplement type specific displayhook and then add this
255 # back as a custom printer. This should also be moved outside utils into the
255 # back as a custom printer. This should also be moved outside utils into the
256 # core.
256 # core.
257
257
258 # def print_slist(arg):
258 # def print_slist(arg):
259 # """ Prettier (non-repr-like) and more informative printer for SList """
259 # """ Prettier (non-repr-like) and more informative printer for SList """
260 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
260 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
261 # if hasattr(arg, 'hideonce') and arg.hideonce:
261 # if hasattr(arg, 'hideonce') and arg.hideonce:
262 # arg.hideonce = False
262 # arg.hideonce = False
263 # return
263 # return
264 #
264 #
265 # nlprint(arg) # This was a nested list printer, now removed.
265 # nlprint(arg) # This was a nested list printer, now removed.
266 #
266 #
267 # print_slist = result_display.when_type(SList)(print_slist)
267 # print_slist = result_display.when_type(SList)(print_slist)
268
268
269
269
270 def indent(instr,nspaces=4, ntabs=0, flatten=False):
270 def indent(instr,nspaces=4, ntabs=0, flatten=False):
271 """Indent a string a given number of spaces or tabstops.
271 """Indent a string a given number of spaces or tabstops.
272
272
273 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
273 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
274
274
275 Parameters
275 Parameters
276 ----------
276 ----------
277
277
278 instr : basestring
278 instr : basestring
279 The string to be indented.
279 The string to be indented.
280 nspaces : int (default: 4)
280 nspaces : int (default: 4)
281 The number of spaces to be indented.
281 The number of spaces to be indented.
282 ntabs : int (default: 0)
282 ntabs : int (default: 0)
283 The number of tabs to be indented.
283 The number of tabs to be indented.
284 flatten : bool (default: False)
284 flatten : bool (default: False)
285 Whether to scrub existing indentation. If True, all lines will be
285 Whether to scrub existing indentation. If True, all lines will be
286 aligned to the same indentation. If False, existing indentation will
286 aligned to the same indentation. If False, existing indentation will
287 be strictly increased.
287 be strictly increased.
288
288
289 Returns
289 Returns
290 -------
290 -------
291
291
292 str|unicode : string indented by ntabs and nspaces.
292 str|unicode : string indented by ntabs and nspaces.
293
293
294 """
294 """
295 if instr is None:
295 if instr is None:
296 return
296 return
297 ind = '\t'*ntabs+' '*nspaces
297 ind = '\t'*ntabs+' '*nspaces
298 if flatten:
298 if flatten:
299 pat = re.compile(r'^\s*', re.MULTILINE)
299 pat = re.compile(r'^\s*', re.MULTILINE)
300 else:
300 else:
301 pat = re.compile(r'^', re.MULTILINE)
301 pat = re.compile(r'^', re.MULTILINE)
302 outstr = re.sub(pat, ind, instr)
302 outstr = re.sub(pat, ind, instr)
303 if outstr.endswith(os.linesep+ind):
303 if outstr.endswith(os.linesep+ind):
304 return outstr[:-len(ind)]
304 return outstr[:-len(ind)]
305 else:
305 else:
306 return outstr
306 return outstr
307
307
308
308
309 def list_strings(arg):
309 def list_strings(arg):
310 """Always return a list of strings, given a string or list of strings
310 """Always return a list of strings, given a string or list of strings
311 as input.
311 as input.
312
312
313 Examples
313 Examples
314 --------
314 --------
315 ::
315 ::
316
316
317 In [7]: list_strings('A single string')
317 In [7]: list_strings('A single string')
318 Out[7]: ['A single string']
318 Out[7]: ['A single string']
319
319
320 In [8]: list_strings(['A single string in a list'])
320 In [8]: list_strings(['A single string in a list'])
321 Out[8]: ['A single string in a list']
321 Out[8]: ['A single string in a list']
322
322
323 In [9]: list_strings(['A','list','of','strings'])
323 In [9]: list_strings(['A','list','of','strings'])
324 Out[9]: ['A', 'list', 'of', 'strings']
324 Out[9]: ['A', 'list', 'of', 'strings']
325 """
325 """
326
326
327 if isinstance(arg, py3compat.string_types): return [arg]
327 if isinstance(arg, py3compat.string_types): return [arg]
328 else: return arg
328 else: return arg
329
329
330
330
331 def marquee(txt='',width=78,mark='*'):
331 def marquee(txt='',width=78,mark='*'):
332 """Return the input string centered in a 'marquee'.
332 """Return the input string centered in a 'marquee'.
333
333
334 Examples
334 Examples
335 --------
335 --------
336 ::
336 ::
337
337
338 In [16]: marquee('A test',40)
338 In [16]: marquee('A test',40)
339 Out[16]: '**************** A test ****************'
339 Out[16]: '**************** A test ****************'
340
340
341 In [17]: marquee('A test',40,'-')
341 In [17]: marquee('A test',40,'-')
342 Out[17]: '---------------- A test ----------------'
342 Out[17]: '---------------- A test ----------------'
343
343
344 In [18]: marquee('A test',40,' ')
344 In [18]: marquee('A test',40,' ')
345 Out[18]: ' A test '
345 Out[18]: ' A test '
346
346
347 """
347 """
348 if not txt:
348 if not txt:
349 return (mark*width)[:width]
349 return (mark*width)[:width]
350 nmark = (width-len(txt)-2)//len(mark)//2
350 nmark = (width-len(txt)-2)//len(mark)//2
351 if nmark < 0: nmark =0
351 if nmark < 0: nmark =0
352 marks = mark*nmark
352 marks = mark*nmark
353 return '%s %s %s' % (marks,txt,marks)
353 return '%s %s %s' % (marks,txt,marks)
354
354
355
355
356 ini_spaces_re = re.compile(r'^(\s+)')
356 ini_spaces_re = re.compile(r'^(\s+)')
357
357
358 def num_ini_spaces(strng):
358 def num_ini_spaces(strng):
359 """Return the number of initial spaces in a string"""
359 """Return the number of initial spaces in a string"""
360
360
361 ini_spaces = ini_spaces_re.match(strng)
361 ini_spaces = ini_spaces_re.match(strng)
362 if ini_spaces:
362 if ini_spaces:
363 return ini_spaces.end()
363 return ini_spaces.end()
364 else:
364 else:
365 return 0
365 return 0
366
366
367
367
368 def format_screen(strng):
368 def format_screen(strng):
369 """Format a string for screen printing.
369 """Format a string for screen printing.
370
370
371 This removes some latex-type format codes."""
371 This removes some latex-type format codes."""
372 # Paragraph continue
372 # Paragraph continue
373 par_re = re.compile(r'\\$',re.MULTILINE)
373 par_re = re.compile(r'\\$',re.MULTILINE)
374 strng = par_re.sub('',strng)
374 strng = par_re.sub('',strng)
375 return strng
375 return strng
376
376
377
377
378 def dedent(text):
378 def dedent(text):
379 """Equivalent of textwrap.dedent that ignores unindented first line.
379 """Equivalent of textwrap.dedent that ignores unindented first line.
380
380
381 This means it will still dedent strings like:
381 This means it will still dedent strings like:
382 '''foo
382 '''foo
383 is a bar
383 is a bar
384 '''
384 '''
385
385
386 For use in wrap_paragraphs.
386 For use in wrap_paragraphs.
387 """
387 """
388
388
389 if text.startswith('\n'):
389 if text.startswith('\n'):
390 # text starts with blank line, don't ignore the first line
390 # text starts with blank line, don't ignore the first line
391 return textwrap.dedent(text)
391 return textwrap.dedent(text)
392
392
393 # split first line
393 # split first line
394 splits = text.split('\n',1)
394 splits = text.split('\n',1)
395 if len(splits) == 1:
395 if len(splits) == 1:
396 # only one line
396 # only one line
397 return textwrap.dedent(text)
397 return textwrap.dedent(text)
398
398
399 first, rest = splits
399 first, rest = splits
400 # dedent everything but the first line
400 # dedent everything but the first line
401 rest = textwrap.dedent(rest)
401 rest = textwrap.dedent(rest)
402 return '\n'.join([first, rest])
402 return '\n'.join([first, rest])
403
403
404
404
405 def wrap_paragraphs(text, ncols=80):
405 def wrap_paragraphs(text, ncols=80):
406 """Wrap multiple paragraphs to fit a specified width.
406 """Wrap multiple paragraphs to fit a specified width.
407
407
408 This is equivalent to textwrap.wrap, but with support for multiple
408 This is equivalent to textwrap.wrap, but with support for multiple
409 paragraphs, as separated by empty lines.
409 paragraphs, as separated by empty lines.
410
410
411 Returns
411 Returns
412 -------
412 -------
413
413
414 list of complete paragraphs, wrapped to fill `ncols` columns.
414 list of complete paragraphs, wrapped to fill `ncols` columns.
415 """
415 """
416 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
416 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
417 text = dedent(text).strip()
417 text = dedent(text).strip()
418 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
418 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
419 out_ps = []
419 out_ps = []
420 indent_re = re.compile(r'\n\s+', re.MULTILINE)
420 indent_re = re.compile(r'\n\s+', re.MULTILINE)
421 for p in paragraphs:
421 for p in paragraphs:
422 # presume indentation that survives dedent is meaningful formatting,
422 # presume indentation that survives dedent is meaningful formatting,
423 # so don't fill unless text is flush.
423 # so don't fill unless text is flush.
424 if indent_re.search(p) is None:
424 if indent_re.search(p) is None:
425 # wrap paragraph
425 # wrap paragraph
426 p = textwrap.fill(p, ncols)
426 p = textwrap.fill(p, ncols)
427 out_ps.append(p)
427 out_ps.append(p)
428 return out_ps
428 return out_ps
429
429
430
430
431 def long_substr(data):
431 def long_substr(data):
432 """Return the longest common substring in a list of strings.
432 """Return the longest common substring in a list of strings.
433
433
434 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
434 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
435 """
435 """
436 substr = ''
436 substr = ''
437 if len(data) > 1 and len(data[0]) > 0:
437 if len(data) > 1 and len(data[0]) > 0:
438 for i in range(len(data[0])):
438 for i in range(len(data[0])):
439 for j in range(len(data[0])-i+1):
439 for j in range(len(data[0])-i+1):
440 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
440 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
441 substr = data[0][i:i+j]
441 substr = data[0][i:i+j]
442 elif len(data) == 1:
442 elif len(data) == 1:
443 substr = data[0]
443 substr = data[0]
444 return substr
444 return substr
445
445
446
446
447 def strip_email_quotes(text):
447 def strip_email_quotes(text):
448 """Strip leading email quotation characters ('>').
448 """Strip leading email quotation characters ('>').
449
449
450 Removes any combination of leading '>' interspersed with whitespace that
450 Removes any combination of leading '>' interspersed with whitespace that
451 appears *identically* in all lines of the input text.
451 appears *identically* in all lines of the input text.
452
452
453 Parameters
453 Parameters
454 ----------
454 ----------
455 text : str
455 text : str
456
456
457 Examples
457 Examples
458 --------
458 --------
459
459
460 Simple uses::
460 Simple uses::
461
461
462 In [2]: strip_email_quotes('> > text')
462 In [2]: strip_email_quotes('> > text')
463 Out[2]: 'text'
463 Out[2]: 'text'
464
464
465 In [3]: strip_email_quotes('> > text\\n> > more')
465 In [3]: strip_email_quotes('> > text\\n> > more')
466 Out[3]: 'text\\nmore'
466 Out[3]: 'text\\nmore'
467
467
468 Note how only the common prefix that appears in all lines is stripped::
468 Note how only the common prefix that appears in all lines is stripped::
469
469
470 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
470 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
471 Out[4]: '> text\\n> more\\nmore...'
471 Out[4]: '> text\\n> more\\nmore...'
472
472
473 So if any line has no quote marks ('>') , then none are stripped from any
473 So if any line has no quote marks ('>') , then none are stripped from any
474 of them ::
474 of them ::
475
475
476 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
476 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
477 Out[5]: '> > text\\n> > more\\nlast different'
477 Out[5]: '> > text\\n> > more\\nlast different'
478 """
478 """
479 lines = text.splitlines()
479 lines = text.splitlines()
480 matches = set()
480 matches = set()
481 for line in lines:
481 for line in lines:
482 prefix = re.match(r'^(\s*>[ >]*)', line)
482 prefix = re.match(r'^(\s*>[ >]*)', line)
483 if prefix:
483 if prefix:
484 matches.add(prefix.group(1))
484 matches.add(prefix.group(1))
485 else:
485 else:
486 break
486 break
487 else:
487 else:
488 prefix = long_substr(list(matches))
488 prefix = long_substr(list(matches))
489 if prefix:
489 if prefix:
490 strip = len(prefix)
490 strip = len(prefix)
491 text = '\n'.join([ ln[strip:] for ln in lines])
491 text = '\n'.join([ ln[strip:] for ln in lines])
492 return text
492 return text
493
493
494 def strip_ansi(source):
494 def strip_ansi(source):
495 """
495 """
496 Remove ansi escape codes from text.
496 Remove ansi escape codes from text.
497
497
498 Parameters
498 Parameters
499 ----------
499 ----------
500 source : str
500 source : str
501 Source to remove the ansi from
501 Source to remove the ansi from
502 """
502 """
503 return re.sub(r'\033\[(\d|;)+?m', '', source)
503 return re.sub(r'\033\[(\d|;)+?m', '', source)
504
504
505
505
506 class EvalFormatter(Formatter):
506 class EvalFormatter(Formatter):
507 """A String Formatter that allows evaluation of simple expressions.
507 """A String Formatter that allows evaluation of simple expressions.
508
508
509 Note that this version interprets a : as specifying a format string (as per
509 Note that this version interprets a : as specifying a format string (as per
510 standard string formatting), so if slicing is required, you must explicitly
510 standard string formatting), so if slicing is required, you must explicitly
511 create a slice.
511 create a slice.
512
512
513 This is to be used in templating cases, such as the parallel batch
513 This is to be used in templating cases, such as the parallel batch
514 script templates, where simple arithmetic on arguments is useful.
514 script templates, where simple arithmetic on arguments is useful.
515
515
516 Examples
516 Examples
517 --------
517 --------
518 ::
518 ::
519
519
520 In [1]: f = EvalFormatter()
520 In [1]: f = EvalFormatter()
521 In [2]: f.format('{n//4}', n=8)
521 In [2]: f.format('{n//4}', n=8)
522 Out[2]: '2'
522 Out[2]: '2'
523
523
524 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
524 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
525 Out[3]: 'll'
525 Out[3]: 'll'
526 """
526 """
527 def get_field(self, name, args, kwargs):
527 def get_field(self, name, args, kwargs):
528 v = eval(name, kwargs)
528 v = eval(name, kwargs)
529 return v, name
529 return v, name
530
530
531 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
531 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
532 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
532 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
533 # above, it should be possible to remove FullEvalFormatter.
533 # above, it should be possible to remove FullEvalFormatter.
534
534
535 @skip_doctest_py3
535 @skip_doctest_py3
536 class FullEvalFormatter(Formatter):
536 class FullEvalFormatter(Formatter):
537 """A String Formatter that allows evaluation of simple expressions.
537 """A String Formatter that allows evaluation of simple expressions.
538
538
539 Any time a format key is not found in the kwargs,
539 Any time a format key is not found in the kwargs,
540 it will be tried as an expression in the kwargs namespace.
540 it will be tried as an expression in the kwargs namespace.
541
541
542 Note that this version allows slicing using [1:2], so you cannot specify
542 Note that this version allows slicing using [1:2], so you cannot specify
543 a format string. Use :class:`EvalFormatter` to permit format strings.
543 a format string. Use :class:`EvalFormatter` to permit format strings.
544
544
545 Examples
545 Examples
546 --------
546 --------
547 ::
547 ::
548
548
549 In [1]: f = FullEvalFormatter()
549 In [1]: f = FullEvalFormatter()
550 In [2]: f.format('{n//4}', n=8)
550 In [2]: f.format('{n//4}', n=8)
551 Out[2]: u'2'
551 Out[2]: u'2'
552
552
553 In [3]: f.format('{list(range(5))[2:4]}')
553 In [3]: f.format('{list(range(5))[2:4]}')
554 Out[3]: u'[2, 3]'
554 Out[3]: u'[2, 3]'
555
555
556 In [4]: f.format('{3*2}')
556 In [4]: f.format('{3*2}')
557 Out[4]: u'6'
557 Out[4]: u'6'
558 """
558 """
559 # copied from Formatter._vformat with minor changes to allow eval
559 # copied from Formatter._vformat with minor changes to allow eval
560 # and replace the format_spec code with slicing
560 # and replace the format_spec code with slicing
561 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
561 def vformat(self, format_string, args, kwargs):
562 if recursion_depth < 0:
563 raise ValueError('Max string recursion exceeded')
564 result = []
562 result = []
565 for literal_text, field_name, format_spec, conversion in \
563 for literal_text, field_name, format_spec, conversion in \
566 self.parse(format_string):
564 self.parse(format_string):
567
565
568 # output the literal text
566 # output the literal text
569 if literal_text:
567 if literal_text:
570 result.append(literal_text)
568 result.append(literal_text)
571
569
572 # if there's a field, output it
570 # if there's a field, output it
573 if field_name is not None:
571 if field_name is not None:
574 # this is some markup, find the object and do
572 # this is some markup, find the object and do
575 # the formatting
573 # the formatting
576
574
577 if format_spec:
575 if format_spec:
578 # override format spec, to allow slicing:
576 # override format spec, to allow slicing:
579 field_name = ':'.join([field_name, format_spec])
577 field_name = ':'.join([field_name, format_spec])
580
578
581 # eval the contents of the field for the object
579 # eval the contents of the field for the object
582 # to be formatted
580 # to be formatted
583 obj = eval(field_name, kwargs)
581 obj = eval(field_name, kwargs)
584
582
585 # do any conversion on the resulting object
583 # do any conversion on the resulting object
586 obj = self.convert_field(obj, conversion)
584 obj = self.convert_field(obj, conversion)
587
585
588 # format the object and append to the result
586 # format the object and append to the result
589 result.append(self.format_field(obj, ''))
587 result.append(self.format_field(obj, ''))
590
588
591 return u''.join(py3compat.cast_unicode(s) for s in result)
589 return u''.join(py3compat.cast_unicode(s) for s in result)
592
590
593
591
594 @skip_doctest_py3
592 @skip_doctest_py3
595 class DollarFormatter(FullEvalFormatter):
593 class DollarFormatter(FullEvalFormatter):
596 """Formatter allowing Itpl style $foo replacement, for names and attribute
594 """Formatter allowing Itpl style $foo replacement, for names and attribute
597 access only. Standard {foo} replacement also works, and allows full
595 access only. Standard {foo} replacement also works, and allows full
598 evaluation of its arguments.
596 evaluation of its arguments.
599
597
600 Examples
598 Examples
601 --------
599 --------
602 ::
600 ::
603
601
604 In [1]: f = DollarFormatter()
602 In [1]: f = DollarFormatter()
605 In [2]: f.format('{n//4}', n=8)
603 In [2]: f.format('{n//4}', n=8)
606 Out[2]: u'2'
604 Out[2]: u'2'
607
605
608 In [3]: f.format('23 * 76 is $result', result=23*76)
606 In [3]: f.format('23 * 76 is $result', result=23*76)
609 Out[3]: u'23 * 76 is 1748'
607 Out[3]: u'23 * 76 is 1748'
610
608
611 In [4]: f.format('$a or {b}', a=1, b=2)
609 In [4]: f.format('$a or {b}', a=1, b=2)
612 Out[4]: u'1 or 2'
610 Out[4]: u'1 or 2'
613 """
611 """
614 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
612 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
615 def parse(self, fmt_string):
613 def parse(self, fmt_string):
616 for literal_txt, field_name, format_spec, conversion \
614 for literal_txt, field_name, format_spec, conversion \
617 in Formatter.parse(self, fmt_string):
615 in Formatter.parse(self, fmt_string):
618
616
619 # Find $foo patterns in the literal text.
617 # Find $foo patterns in the literal text.
620 continue_from = 0
618 continue_from = 0
621 txt = ""
619 txt = ""
622 for m in self._dollar_pattern.finditer(literal_txt):
620 for m in self._dollar_pattern.finditer(literal_txt):
623 new_txt, new_field = m.group(1,2)
621 new_txt, new_field = m.group(1,2)
624 # $$foo --> $foo
622 # $$foo --> $foo
625 if new_field.startswith("$"):
623 if new_field.startswith("$"):
626 txt += new_txt + new_field
624 txt += new_txt + new_field
627 else:
625 else:
628 yield (txt + new_txt, new_field, "", None)
626 yield (txt + new_txt, new_field, "", None)
629 txt = ""
627 txt = ""
630 continue_from = m.end()
628 continue_from = m.end()
631
629
632 # Re-yield the {foo} style pattern
630 # Re-yield the {foo} style pattern
633 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
631 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
634
632
635 #-----------------------------------------------------------------------------
633 #-----------------------------------------------------------------------------
636 # Utils to columnize a list of string
634 # Utils to columnize a list of string
637 #-----------------------------------------------------------------------------
635 #-----------------------------------------------------------------------------
638
636
639 def _chunks(l, n):
637 def _chunks(l, n):
640 """Yield successive n-sized chunks from l."""
638 """Yield successive n-sized chunks from l."""
641 for i in py3compat.xrange(0, len(l), n):
639 for i in py3compat.xrange(0, len(l), n):
642 yield l[i:i+n]
640 yield l[i:i+n]
643
641
644
642
645 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
643 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
646 """Calculate optimal info to columnize a list of string"""
644 """Calculate optimal info to columnize a list of string"""
647 for nrow in range(1, len(rlist)+1) :
645 for nrow in range(1, len(rlist)+1) :
648 chk = list(map(max,_chunks(rlist, nrow)))
646 chk = list(map(max,_chunks(rlist, nrow)))
649 sumlength = sum(chk)
647 sumlength = sum(chk)
650 ncols = len(chk)
648 ncols = len(chk)
651 if sumlength+separator_size*(ncols-1) <= displaywidth :
649 if sumlength+separator_size*(ncols-1) <= displaywidth :
652 break;
650 break;
653 return {'columns_numbers' : ncols,
651 return {'columns_numbers' : ncols,
654 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
652 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
655 'rows_numbers' : nrow,
653 'rows_numbers' : nrow,
656 'columns_width' : chk
654 'columns_width' : chk
657 }
655 }
658
656
659
657
660 def _get_or_default(mylist, i, default=None):
658 def _get_or_default(mylist, i, default=None):
661 """return list item number, or default if don't exist"""
659 """return list item number, or default if don't exist"""
662 if i >= len(mylist):
660 if i >= len(mylist):
663 return default
661 return default
664 else :
662 else :
665 return mylist[i]
663 return mylist[i]
666
664
667
665
668 @skip_doctest
666 @skip_doctest
669 def compute_item_matrix(items, empty=None, *args, **kwargs) :
667 def compute_item_matrix(items, empty=None, *args, **kwargs) :
670 """Returns a nested list, and info to columnize items
668 """Returns a nested list, and info to columnize items
671
669
672 Parameters
670 Parameters
673 ----------
671 ----------
674
672
675 items
673 items
676 list of strings to columize
674 list of strings to columize
677 empty : (default None)
675 empty : (default None)
678 default value to fill list if needed
676 default value to fill list if needed
679 separator_size : int (default=2)
677 separator_size : int (default=2)
680 How much caracters will be used as a separation between each columns.
678 How much caracters will be used as a separation between each columns.
681 displaywidth : int (default=80)
679 displaywidth : int (default=80)
682 The width of the area onto wich the columns should enter
680 The width of the area onto wich the columns should enter
683
681
684 Returns
682 Returns
685 -------
683 -------
686
684
687 strings_matrix
685 strings_matrix
688
686
689 nested list of string, the outer most list contains as many list as
687 nested list of string, the outer most list contains as many list as
690 rows, the innermost lists have each as many element as colums. If the
688 rows, the innermost lists have each as many element as colums. If the
691 total number of elements in `items` does not equal the product of
689 total number of elements in `items` does not equal the product of
692 rows*columns, the last element of some lists are filled with `None`.
690 rows*columns, the last element of some lists are filled with `None`.
693
691
694 dict_info
692 dict_info
695 some info to make columnize easier:
693 some info to make columnize easier:
696
694
697 columns_numbers
695 columns_numbers
698 number of columns
696 number of columns
699 rows_numbers
697 rows_numbers
700 number of rows
698 number of rows
701 columns_width
699 columns_width
702 list of with of each columns
700 list of with of each columns
703 optimal_separator_width
701 optimal_separator_width
704 best separator width between columns
702 best separator width between columns
705
703
706 Examples
704 Examples
707 --------
705 --------
708 ::
706 ::
709
707
710 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
708 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
711 ...: compute_item_matrix(l,displaywidth=12)
709 ...: compute_item_matrix(l,displaywidth=12)
712 Out[1]:
710 Out[1]:
713 ([['aaa', 'f', 'k'],
711 ([['aaa', 'f', 'k'],
714 ['b', 'g', 'l'],
712 ['b', 'g', 'l'],
715 ['cc', 'h', None],
713 ['cc', 'h', None],
716 ['d', 'i', None],
714 ['d', 'i', None],
717 ['eeeee', 'j', None]],
715 ['eeeee', 'j', None]],
718 {'columns_numbers': 3,
716 {'columns_numbers': 3,
719 'columns_width': [5, 1, 1],
717 'columns_width': [5, 1, 1],
720 'optimal_separator_width': 2,
718 'optimal_separator_width': 2,
721 'rows_numbers': 5})
719 'rows_numbers': 5})
722 """
720 """
723 info = _find_optimal(list(map(len, items)), *args, **kwargs)
721 info = _find_optimal(list(map(len, items)), *args, **kwargs)
724 nrow, ncol = info['rows_numbers'], info['columns_numbers']
722 nrow, ncol = info['rows_numbers'], info['columns_numbers']
725 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
723 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
726
724
727
725
728 def columnize(items, separator=' ', displaywidth=80):
726 def columnize(items, separator=' ', displaywidth=80):
729 """ Transform a list of strings into a single string with columns.
727 """ Transform a list of strings into a single string with columns.
730
728
731 Parameters
729 Parameters
732 ----------
730 ----------
733 items : sequence of strings
731 items : sequence of strings
734 The strings to process.
732 The strings to process.
735
733
736 separator : str, optional [default is two spaces]
734 separator : str, optional [default is two spaces]
737 The string that separates columns.
735 The string that separates columns.
738
736
739 displaywidth : int, optional [default is 80]
737 displaywidth : int, optional [default is 80]
740 Width of the display in number of characters.
738 Width of the display in number of characters.
741
739
742 Returns
740 Returns
743 -------
741 -------
744 The formatted string.
742 The formatted string.
745 """
743 """
746 if not items :
744 if not items :
747 return '\n'
745 return '\n'
748 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
746 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
749 fmatrix = [filter(None, x) for x in matrix]
747 fmatrix = [filter(None, x) for x in matrix]
750 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
748 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
751 return '\n'.join(map(sjoin, fmatrix))+'\n'
749 return '\n'.join(map(sjoin, fmatrix))+'\n'
752
750
753
751
754 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
752 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
755 """
753 """
756 Return a string with a natural enumeration of items
754 Return a string with a natural enumeration of items
757
755
758 >>> get_text_list(['a', 'b', 'c', 'd'])
756 >>> get_text_list(['a', 'b', 'c', 'd'])
759 'a, b, c and d'
757 'a, b, c and d'
760 >>> get_text_list(['a', 'b', 'c'], ' or ')
758 >>> get_text_list(['a', 'b', 'c'], ' or ')
761 'a, b or c'
759 'a, b or c'
762 >>> get_text_list(['a', 'b', 'c'], ', ')
760 >>> get_text_list(['a', 'b', 'c'], ', ')
763 'a, b, c'
761 'a, b, c'
764 >>> get_text_list(['a', 'b'], ' or ')
762 >>> get_text_list(['a', 'b'], ' or ')
765 'a or b'
763 'a or b'
766 >>> get_text_list(['a'])
764 >>> get_text_list(['a'])
767 'a'
765 'a'
768 >>> get_text_list([])
766 >>> get_text_list([])
769 ''
767 ''
770 >>> get_text_list(['a', 'b'], wrap_item_with="`")
768 >>> get_text_list(['a', 'b'], wrap_item_with="`")
771 '`a` and `b`'
769 '`a` and `b`'
772 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
770 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
773 'a + b + c = d'
771 'a + b + c = d'
774 """
772 """
775 if len(list_) == 0:
773 if len(list_) == 0:
776 return ''
774 return ''
777 if wrap_item_with:
775 if wrap_item_with:
778 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
776 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
779 item in list_]
777 item in list_]
780 if len(list_) == 1:
778 if len(list_) == 1:
781 return list_[0]
779 return list_[0]
782 return '%s%s%s' % (
780 return '%s%s%s' % (
783 sep.join(i for i in list_[:-1]),
781 sep.join(i for i in list_[:-1]),
784 last_sep, list_[-1]) No newline at end of file
782 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now