##// END OF EJS Templates
Squash a couple more Sphinx warnings
Thomas Kluyver -
Show More
@@ -1,768 +1,770 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2008-2011 The IPython Development Team
12 # Copyright (C) 2008-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21
21
22 import os
22 import os
23 import re
23 import re
24 import sys
24 import sys
25 import textwrap
25 import textwrap
26 from string import Formatter
26 from string import Formatter
27
27
28 from IPython.external.path import path
28 from IPython.external.path import path
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
30 from IPython.utils import py3compat
30 from IPython.utils import py3compat
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Declarations
33 # Declarations
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 # datetime.strftime date format for ipython
36 # datetime.strftime date format for ipython
37 if sys.platform == 'win32':
37 if sys.platform == 'win32':
38 date_format = "%B %d, %Y"
38 date_format = "%B %d, %Y"
39 else:
39 else:
40 date_format = "%B %-d, %Y"
40 date_format = "%B %-d, %Y"
41
41
42
42
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44 # Code
44 # Code
45 #-----------------------------------------------------------------------------
45 #-----------------------------------------------------------------------------
46
46
47 class LSString(str):
47 class LSString(str):
48 """String derivative with a special access attributes.
48 """String derivative with a special access attributes.
49
49
50 These are normal strings, but with the special attributes:
50 These are normal strings, but with the special attributes:
51
51
52 .l (or .list) : value as list (split on newlines).
52 .l (or .list) : value as list (split on newlines).
53 .n (or .nlstr): original value (the string itself).
53 .n (or .nlstr): original value (the string itself).
54 .s (or .spstr): value as whitespace-separated string.
54 .s (or .spstr): value as whitespace-separated string.
55 .p (or .paths): list of path objects
55 .p (or .paths): list of path objects
56
56
57 Any values which require transformations are computed only once and
57 Any values which require transformations are computed only once and
58 cached.
58 cached.
59
59
60 Such strings are very useful to efficiently interact with the shell, which
60 Such strings are very useful to efficiently interact with the shell, which
61 typically only understands whitespace-separated options for commands."""
61 typically only understands whitespace-separated options for commands."""
62
62
63 def get_list(self):
63 def get_list(self):
64 try:
64 try:
65 return self.__list
65 return self.__list
66 except AttributeError:
66 except AttributeError:
67 self.__list = self.split('\n')
67 self.__list = self.split('\n')
68 return self.__list
68 return self.__list
69
69
70 l = list = property(get_list)
70 l = list = property(get_list)
71
71
72 def get_spstr(self):
72 def get_spstr(self):
73 try:
73 try:
74 return self.__spstr
74 return self.__spstr
75 except AttributeError:
75 except AttributeError:
76 self.__spstr = self.replace('\n',' ')
76 self.__spstr = self.replace('\n',' ')
77 return self.__spstr
77 return self.__spstr
78
78
79 s = spstr = property(get_spstr)
79 s = spstr = property(get_spstr)
80
80
81 def get_nlstr(self):
81 def get_nlstr(self):
82 return self
82 return self
83
83
84 n = nlstr = property(get_nlstr)
84 n = nlstr = property(get_nlstr)
85
85
86 def get_paths(self):
86 def get_paths(self):
87 try:
87 try:
88 return self.__paths
88 return self.__paths
89 except AttributeError:
89 except AttributeError:
90 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
90 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
91 return self.__paths
91 return self.__paths
92
92
93 p = paths = property(get_paths)
93 p = paths = property(get_paths)
94
94
95 # FIXME: We need to reimplement type specific displayhook and then add this
95 # FIXME: We need to reimplement type specific displayhook and then add this
96 # back as a custom printer. This should also be moved outside utils into the
96 # back as a custom printer. This should also be moved outside utils into the
97 # core.
97 # core.
98
98
99 # def print_lsstring(arg):
99 # def print_lsstring(arg):
100 # """ Prettier (non-repr-like) and more informative printer for LSString """
100 # """ Prettier (non-repr-like) and more informative printer for LSString """
101 # print "LSString (.p, .n, .l, .s available). Value:"
101 # print "LSString (.p, .n, .l, .s available). Value:"
102 # print arg
102 # print arg
103 #
103 #
104 #
104 #
105 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
105 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
106
106
107
107
108 class SList(list):
108 class SList(list):
109 """List derivative with a special access attributes.
109 """List derivative with a special access attributes.
110
110
111 These are normal lists, but with the special attributes:
111 These are normal lists, but with the special attributes:
112
112
113 * .l (or .list) : value as list (the list itself).
113 * .l (or .list) : value as list (the list itself).
114 * .n (or .nlstr): value as a string, joined on newlines.
114 * .n (or .nlstr): value as a string, joined on newlines.
115 * .s (or .spstr): value as a string, joined on spaces.
115 * .s (or .spstr): value as a string, joined on spaces.
116 * .p (or .paths): list of path objects
116 * .p (or .paths): list of path objects
117
117
118 Any values which require transformations are computed only once and
118 Any values which require transformations are computed only once and
119 cached."""
119 cached."""
120
120
121 def get_list(self):
121 def get_list(self):
122 return self
122 return self
123
123
124 l = list = property(get_list)
124 l = list = property(get_list)
125
125
126 def get_spstr(self):
126 def get_spstr(self):
127 try:
127 try:
128 return self.__spstr
128 return self.__spstr
129 except AttributeError:
129 except AttributeError:
130 self.__spstr = ' '.join(self)
130 self.__spstr = ' '.join(self)
131 return self.__spstr
131 return self.__spstr
132
132
133 s = spstr = property(get_spstr)
133 s = spstr = property(get_spstr)
134
134
135 def get_nlstr(self):
135 def get_nlstr(self):
136 try:
136 try:
137 return self.__nlstr
137 return self.__nlstr
138 except AttributeError:
138 except AttributeError:
139 self.__nlstr = '\n'.join(self)
139 self.__nlstr = '\n'.join(self)
140 return self.__nlstr
140 return self.__nlstr
141
141
142 n = nlstr = property(get_nlstr)
142 n = nlstr = property(get_nlstr)
143
143
144 def get_paths(self):
144 def get_paths(self):
145 try:
145 try:
146 return self.__paths
146 return self.__paths
147 except AttributeError:
147 except AttributeError:
148 self.__paths = [path(p) for p in self if os.path.exists(p)]
148 self.__paths = [path(p) for p in self if os.path.exists(p)]
149 return self.__paths
149 return self.__paths
150
150
151 p = paths = property(get_paths)
151 p = paths = property(get_paths)
152
152
153 def grep(self, pattern, prune = False, field = None):
153 def grep(self, pattern, prune = False, field = None):
154 """ Return all strings matching 'pattern' (a regex or callable)
154 """ Return all strings matching 'pattern' (a regex or callable)
155
155
156 This is case-insensitive. If prune is true, return all items
156 This is case-insensitive. If prune is true, return all items
157 NOT matching the pattern.
157 NOT matching the pattern.
158
158
159 If field is specified, the match must occur in the specified
159 If field is specified, the match must occur in the specified
160 whitespace-separated field.
160 whitespace-separated field.
161
161
162 Examples::
162 Examples::
163
163
164 a.grep( lambda x: x.startswith('C') )
164 a.grep( lambda x: x.startswith('C') )
165 a.grep('Cha.*log', prune=1)
165 a.grep('Cha.*log', prune=1)
166 a.grep('chm', field=-1)
166 a.grep('chm', field=-1)
167 """
167 """
168
168
169 def match_target(s):
169 def match_target(s):
170 if field is None:
170 if field is None:
171 return s
171 return s
172 parts = s.split()
172 parts = s.split()
173 try:
173 try:
174 tgt = parts[field]
174 tgt = parts[field]
175 return tgt
175 return tgt
176 except IndexError:
176 except IndexError:
177 return ""
177 return ""
178
178
179 if isinstance(pattern, py3compat.string_types):
179 if isinstance(pattern, py3compat.string_types):
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
181 else:
181 else:
182 pred = pattern
182 pred = pattern
183 if not prune:
183 if not prune:
184 return SList([el for el in self if pred(match_target(el))])
184 return SList([el for el in self if pred(match_target(el))])
185 else:
185 else:
186 return SList([el for el in self if not pred(match_target(el))])
186 return SList([el for el in self if not pred(match_target(el))])
187
187
188 def fields(self, *fields):
188 def fields(self, *fields):
189 """ Collect whitespace-separated fields from string list
189 """ Collect whitespace-separated fields from string list
190
190
191 Allows quick awk-like usage of string lists.
191 Allows quick awk-like usage of string lists.
192
192
193 Example data (in var a, created by 'a = !ls -l')::
193 Example data (in var a, created by 'a = !ls -l')::
194
194
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
197
197
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
200 (note the joining by space).
200 (note the joining by space).
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
202
202
203 IndexErrors are ignored.
203 IndexErrors are ignored.
204
204
205 Without args, fields() just split()'s the strings.
205 Without args, fields() just split()'s the strings.
206 """
206 """
207 if len(fields) == 0:
207 if len(fields) == 0:
208 return [el.split() for el in self]
208 return [el.split() for el in self]
209
209
210 res = SList()
210 res = SList()
211 for el in [f.split() for f in self]:
211 for el in [f.split() for f in self]:
212 lineparts = []
212 lineparts = []
213
213
214 for fd in fields:
214 for fd in fields:
215 try:
215 try:
216 lineparts.append(el[fd])
216 lineparts.append(el[fd])
217 except IndexError:
217 except IndexError:
218 pass
218 pass
219 if lineparts:
219 if lineparts:
220 res.append(" ".join(lineparts))
220 res.append(" ".join(lineparts))
221
221
222 return res
222 return res
223
223
224 def sort(self,field= None, nums = False):
224 def sort(self,field= None, nums = False):
225 """ sort by specified fields (see fields())
225 """ sort by specified fields (see fields())
226
226
227 Example::
227 Example::
228
228
229 a.sort(1, nums = True)
229 a.sort(1, nums = True)
230
230
231 Sorts a by second field, in numerical order (so that 21 > 3)
231 Sorts a by second field, in numerical order (so that 21 > 3)
232
232
233 """
233 """
234
234
235 #decorate, sort, undecorate
235 #decorate, sort, undecorate
236 if field is not None:
236 if field is not None:
237 dsu = [[SList([line]).fields(field), line] for line in self]
237 dsu = [[SList([line]).fields(field), line] for line in self]
238 else:
238 else:
239 dsu = [[line, line] for line in self]
239 dsu = [[line, line] for line in self]
240 if nums:
240 if nums:
241 for i in range(len(dsu)):
241 for i in range(len(dsu)):
242 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
242 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
243 try:
243 try:
244 n = int(numstr)
244 n = int(numstr)
245 except ValueError:
245 except ValueError:
246 n = 0;
246 n = 0;
247 dsu[i][0] = n
247 dsu[i][0] = n
248
248
249
249
250 dsu.sort()
250 dsu.sort()
251 return SList([t[1] for t in dsu])
251 return SList([t[1] for t in dsu])
252
252
253
253
254 # FIXME: We need to reimplement type specific displayhook and then add this
254 # FIXME: We need to reimplement type specific displayhook and then add this
255 # back as a custom printer. This should also be moved outside utils into the
255 # back as a custom printer. This should also be moved outside utils into the
256 # core.
256 # core.
257
257
258 # def print_slist(arg):
258 # def print_slist(arg):
259 # """ Prettier (non-repr-like) and more informative printer for SList """
259 # """ Prettier (non-repr-like) and more informative printer for SList """
260 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
260 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
261 # if hasattr(arg, 'hideonce') and arg.hideonce:
261 # if hasattr(arg, 'hideonce') and arg.hideonce:
262 # arg.hideonce = False
262 # arg.hideonce = False
263 # return
263 # return
264 #
264 #
265 # nlprint(arg) # This was a nested list printer, now removed.
265 # nlprint(arg) # This was a nested list printer, now removed.
266 #
266 #
267 # print_slist = result_display.when_type(SList)(print_slist)
267 # print_slist = result_display.when_type(SList)(print_slist)
268
268
269
269
270 def indent(instr,nspaces=4, ntabs=0, flatten=False):
270 def indent(instr,nspaces=4, ntabs=0, flatten=False):
271 """Indent a string a given number of spaces or tabstops.
271 """Indent a string a given number of spaces or tabstops.
272
272
273 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
273 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
274
274
275 Parameters
275 Parameters
276 ----------
276 ----------
277
277
278 instr : basestring
278 instr : basestring
279 The string to be indented.
279 The string to be indented.
280 nspaces : int (default: 4)
280 nspaces : int (default: 4)
281 The number of spaces to be indented.
281 The number of spaces to be indented.
282 ntabs : int (default: 0)
282 ntabs : int (default: 0)
283 The number of tabs to be indented.
283 The number of tabs to be indented.
284 flatten : bool (default: False)
284 flatten : bool (default: False)
285 Whether to scrub existing indentation. If True, all lines will be
285 Whether to scrub existing indentation. If True, all lines will be
286 aligned to the same indentation. If False, existing indentation will
286 aligned to the same indentation. If False, existing indentation will
287 be strictly increased.
287 be strictly increased.
288
288
289 Returns
289 Returns
290 -------
290 -------
291
291
292 str|unicode : string indented by ntabs and nspaces.
292 str|unicode : string indented by ntabs and nspaces.
293
293
294 """
294 """
295 if instr is None:
295 if instr is None:
296 return
296 return
297 ind = '\t'*ntabs+' '*nspaces
297 ind = '\t'*ntabs+' '*nspaces
298 if flatten:
298 if flatten:
299 pat = re.compile(r'^\s*', re.MULTILINE)
299 pat = re.compile(r'^\s*', re.MULTILINE)
300 else:
300 else:
301 pat = re.compile(r'^', re.MULTILINE)
301 pat = re.compile(r'^', re.MULTILINE)
302 outstr = re.sub(pat, ind, instr)
302 outstr = re.sub(pat, ind, instr)
303 if outstr.endswith(os.linesep+ind):
303 if outstr.endswith(os.linesep+ind):
304 return outstr[:-len(ind)]
304 return outstr[:-len(ind)]
305 else:
305 else:
306 return outstr
306 return outstr
307
307
308
308
309 def list_strings(arg):
309 def list_strings(arg):
310 """Always return a list of strings, given a string or list of strings
310 """Always return a list of strings, given a string or list of strings
311 as input.
311 as input.
312
312
313 :Examples:
313 Examples
314 --------
314 ::
315 ::
315
316
316 In [7]: list_strings('A single string')
317 In [7]: list_strings('A single string')
317 Out[7]: ['A single string']
318 Out[7]: ['A single string']
318
319
319 In [8]: list_strings(['A single string in a list'])
320 In [8]: list_strings(['A single string in a list'])
320 Out[8]: ['A single string in a list']
321 Out[8]: ['A single string in a list']
321
322
322 In [9]: list_strings(['A','list','of','strings'])
323 In [9]: list_strings(['A','list','of','strings'])
323 Out[9]: ['A', 'list', 'of', 'strings']
324 Out[9]: ['A', 'list', 'of', 'strings']
324 """
325 """
325
326
326 if isinstance(arg, py3compat.string_types): return [arg]
327 if isinstance(arg, py3compat.string_types): return [arg]
327 else: return arg
328 else: return arg
328
329
329
330
330 def marquee(txt='',width=78,mark='*'):
331 def marquee(txt='',width=78,mark='*'):
331 """Return the input string centered in a 'marquee'.
332 """Return the input string centered in a 'marquee'.
332
333
333 :Examples:
334 Examples
335 --------
334 ::
336 ::
335
337
336 In [16]: marquee('A test',40)
338 In [16]: marquee('A test',40)
337 Out[16]: '**************** A test ****************'
339 Out[16]: '**************** A test ****************'
338
340
339 In [17]: marquee('A test',40,'-')
341 In [17]: marquee('A test',40,'-')
340 Out[17]: '---------------- A test ----------------'
342 Out[17]: '---------------- A test ----------------'
341
343
342 In [18]: marquee('A test',40,' ')
344 In [18]: marquee('A test',40,' ')
343 Out[18]: ' A test '
345 Out[18]: ' A test '
344
346
345 """
347 """
346 if not txt:
348 if not txt:
347 return (mark*width)[:width]
349 return (mark*width)[:width]
348 nmark = (width-len(txt)-2)//len(mark)//2
350 nmark = (width-len(txt)-2)//len(mark)//2
349 if nmark < 0: nmark =0
351 if nmark < 0: nmark =0
350 marks = mark*nmark
352 marks = mark*nmark
351 return '%s %s %s' % (marks,txt,marks)
353 return '%s %s %s' % (marks,txt,marks)
352
354
353
355
354 ini_spaces_re = re.compile(r'^(\s+)')
356 ini_spaces_re = re.compile(r'^(\s+)')
355
357
356 def num_ini_spaces(strng):
358 def num_ini_spaces(strng):
357 """Return the number of initial spaces in a string"""
359 """Return the number of initial spaces in a string"""
358
360
359 ini_spaces = ini_spaces_re.match(strng)
361 ini_spaces = ini_spaces_re.match(strng)
360 if ini_spaces:
362 if ini_spaces:
361 return ini_spaces.end()
363 return ini_spaces.end()
362 else:
364 else:
363 return 0
365 return 0
364
366
365
367
366 def format_screen(strng):
368 def format_screen(strng):
367 """Format a string for screen printing.
369 """Format a string for screen printing.
368
370
369 This removes some latex-type format codes."""
371 This removes some latex-type format codes."""
370 # Paragraph continue
372 # Paragraph continue
371 par_re = re.compile(r'\\$',re.MULTILINE)
373 par_re = re.compile(r'\\$',re.MULTILINE)
372 strng = par_re.sub('',strng)
374 strng = par_re.sub('',strng)
373 return strng
375 return strng
374
376
375
377
376 def dedent(text):
378 def dedent(text):
377 """Equivalent of textwrap.dedent that ignores unindented first line.
379 """Equivalent of textwrap.dedent that ignores unindented first line.
378
380
379 This means it will still dedent strings like:
381 This means it will still dedent strings like:
380 '''foo
382 '''foo
381 is a bar
383 is a bar
382 '''
384 '''
383
385
384 For use in wrap_paragraphs.
386 For use in wrap_paragraphs.
385 """
387 """
386
388
387 if text.startswith('\n'):
389 if text.startswith('\n'):
388 # text starts with blank line, don't ignore the first line
390 # text starts with blank line, don't ignore the first line
389 return textwrap.dedent(text)
391 return textwrap.dedent(text)
390
392
391 # split first line
393 # split first line
392 splits = text.split('\n',1)
394 splits = text.split('\n',1)
393 if len(splits) == 1:
395 if len(splits) == 1:
394 # only one line
396 # only one line
395 return textwrap.dedent(text)
397 return textwrap.dedent(text)
396
398
397 first, rest = splits
399 first, rest = splits
398 # dedent everything but the first line
400 # dedent everything but the first line
399 rest = textwrap.dedent(rest)
401 rest = textwrap.dedent(rest)
400 return '\n'.join([first, rest])
402 return '\n'.join([first, rest])
401
403
402
404
403 def wrap_paragraphs(text, ncols=80):
405 def wrap_paragraphs(text, ncols=80):
404 """Wrap multiple paragraphs to fit a specified width.
406 """Wrap multiple paragraphs to fit a specified width.
405
407
406 This is equivalent to textwrap.wrap, but with support for multiple
408 This is equivalent to textwrap.wrap, but with support for multiple
407 paragraphs, as separated by empty lines.
409 paragraphs, as separated by empty lines.
408
410
409 Returns
411 Returns
410 -------
412 -------
411
413
412 list of complete paragraphs, wrapped to fill `ncols` columns.
414 list of complete paragraphs, wrapped to fill `ncols` columns.
413 """
415 """
414 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
416 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
415 text = dedent(text).strip()
417 text = dedent(text).strip()
416 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
418 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
417 out_ps = []
419 out_ps = []
418 indent_re = re.compile(r'\n\s+', re.MULTILINE)
420 indent_re = re.compile(r'\n\s+', re.MULTILINE)
419 for p in paragraphs:
421 for p in paragraphs:
420 # presume indentation that survives dedent is meaningful formatting,
422 # presume indentation that survives dedent is meaningful formatting,
421 # so don't fill unless text is flush.
423 # so don't fill unless text is flush.
422 if indent_re.search(p) is None:
424 if indent_re.search(p) is None:
423 # wrap paragraph
425 # wrap paragraph
424 p = textwrap.fill(p, ncols)
426 p = textwrap.fill(p, ncols)
425 out_ps.append(p)
427 out_ps.append(p)
426 return out_ps
428 return out_ps
427
429
428
430
429 def long_substr(data):
431 def long_substr(data):
430 """Return the longest common substring in a list of strings.
432 """Return the longest common substring in a list of strings.
431
433
432 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
434 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
433 """
435 """
434 substr = ''
436 substr = ''
435 if len(data) > 1 and len(data[0]) > 0:
437 if len(data) > 1 and len(data[0]) > 0:
436 for i in range(len(data[0])):
438 for i in range(len(data[0])):
437 for j in range(len(data[0])-i+1):
439 for j in range(len(data[0])-i+1):
438 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
440 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
439 substr = data[0][i:i+j]
441 substr = data[0][i:i+j]
440 elif len(data) == 1:
442 elif len(data) == 1:
441 substr = data[0]
443 substr = data[0]
442 return substr
444 return substr
443
445
444
446
445 def strip_email_quotes(text):
447 def strip_email_quotes(text):
446 """Strip leading email quotation characters ('>').
448 """Strip leading email quotation characters ('>').
447
449
448 Removes any combination of leading '>' interspersed with whitespace that
450 Removes any combination of leading '>' interspersed with whitespace that
449 appears *identically* in all lines of the input text.
451 appears *identically* in all lines of the input text.
450
452
451 Parameters
453 Parameters
452 ----------
454 ----------
453 text : str
455 text : str
454
456
455 Examples
457 Examples
456 --------
458 --------
457
459
458 Simple uses::
460 Simple uses::
459
461
460 In [2]: strip_email_quotes('> > text')
462 In [2]: strip_email_quotes('> > text')
461 Out[2]: 'text'
463 Out[2]: 'text'
462
464
463 In [3]: strip_email_quotes('> > text\\n> > more')
465 In [3]: strip_email_quotes('> > text\\n> > more')
464 Out[3]: 'text\\nmore'
466 Out[3]: 'text\\nmore'
465
467
466 Note how only the common prefix that appears in all lines is stripped::
468 Note how only the common prefix that appears in all lines is stripped::
467
469
468 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
470 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
469 Out[4]: '> text\\n> more\\nmore...'
471 Out[4]: '> text\\n> more\\nmore...'
470
472
471 So if any line has no quote marks ('>') , then none are stripped from any
473 So if any line has no quote marks ('>') , then none are stripped from any
472 of them ::
474 of them ::
473
475
474 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
476 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
475 Out[5]: '> > text\\n> > more\\nlast different'
477 Out[5]: '> > text\\n> > more\\nlast different'
476 """
478 """
477 lines = text.splitlines()
479 lines = text.splitlines()
478 matches = set()
480 matches = set()
479 for line in lines:
481 for line in lines:
480 prefix = re.match(r'^(\s*>[ >]*)', line)
482 prefix = re.match(r'^(\s*>[ >]*)', line)
481 if prefix:
483 if prefix:
482 matches.add(prefix.group(1))
484 matches.add(prefix.group(1))
483 else:
485 else:
484 break
486 break
485 else:
487 else:
486 prefix = long_substr(list(matches))
488 prefix = long_substr(list(matches))
487 if prefix:
489 if prefix:
488 strip = len(prefix)
490 strip = len(prefix)
489 text = '\n'.join([ ln[strip:] for ln in lines])
491 text = '\n'.join([ ln[strip:] for ln in lines])
490 return text
492 return text
491
493
492
494
493 class EvalFormatter(Formatter):
495 class EvalFormatter(Formatter):
494 """A String Formatter that allows evaluation of simple expressions.
496 """A String Formatter that allows evaluation of simple expressions.
495
497
496 Note that this version interprets a : as specifying a format string (as per
498 Note that this version interprets a : as specifying a format string (as per
497 standard string formatting), so if slicing is required, you must explicitly
499 standard string formatting), so if slicing is required, you must explicitly
498 create a slice.
500 create a slice.
499
501
500 This is to be used in templating cases, such as the parallel batch
502 This is to be used in templating cases, such as the parallel batch
501 script templates, where simple arithmetic on arguments is useful.
503 script templates, where simple arithmetic on arguments is useful.
502
504
503 Examples
505 Examples
504 --------
506 --------
505 ::
507 ::
506
508
507 In [1]: f = EvalFormatter()
509 In [1]: f = EvalFormatter()
508 In [2]: f.format('{n//4}', n=8)
510 In [2]: f.format('{n//4}', n=8)
509 Out[2]: '2'
511 Out[2]: '2'
510
512
511 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
513 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
512 Out[3]: 'll'
514 Out[3]: 'll'
513 """
515 """
514 def get_field(self, name, args, kwargs):
516 def get_field(self, name, args, kwargs):
515 v = eval(name, kwargs)
517 v = eval(name, kwargs)
516 return v, name
518 return v, name
517
519
518
520
519 @skip_doctest_py3
521 @skip_doctest_py3
520 class FullEvalFormatter(Formatter):
522 class FullEvalFormatter(Formatter):
521 """A String Formatter that allows evaluation of simple expressions.
523 """A String Formatter that allows evaluation of simple expressions.
522
524
523 Any time a format key is not found in the kwargs,
525 Any time a format key is not found in the kwargs,
524 it will be tried as an expression in the kwargs namespace.
526 it will be tried as an expression in the kwargs namespace.
525
527
526 Note that this version allows slicing using [1:2], so you cannot specify
528 Note that this version allows slicing using [1:2], so you cannot specify
527 a format string. Use :class:`EvalFormatter` to permit format strings.
529 a format string. Use :class:`EvalFormatter` to permit format strings.
528
530
529 Examples
531 Examples
530 --------
532 --------
531 ::
533 ::
532
534
533 In [1]: f = FullEvalFormatter()
535 In [1]: f = FullEvalFormatter()
534 In [2]: f.format('{n//4}', n=8)
536 In [2]: f.format('{n//4}', n=8)
535 Out[2]: u'2'
537 Out[2]: u'2'
536
538
537 In [3]: f.format('{list(range(5))[2:4]}')
539 In [3]: f.format('{list(range(5))[2:4]}')
538 Out[3]: u'[2, 3]'
540 Out[3]: u'[2, 3]'
539
541
540 In [4]: f.format('{3*2}')
542 In [4]: f.format('{3*2}')
541 Out[4]: u'6'
543 Out[4]: u'6'
542 """
544 """
543 # copied from Formatter._vformat with minor changes to allow eval
545 # copied from Formatter._vformat with minor changes to allow eval
544 # and replace the format_spec code with slicing
546 # and replace the format_spec code with slicing
545 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
547 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
546 if recursion_depth < 0:
548 if recursion_depth < 0:
547 raise ValueError('Max string recursion exceeded')
549 raise ValueError('Max string recursion exceeded')
548 result = []
550 result = []
549 for literal_text, field_name, format_spec, conversion in \
551 for literal_text, field_name, format_spec, conversion in \
550 self.parse(format_string):
552 self.parse(format_string):
551
553
552 # output the literal text
554 # output the literal text
553 if literal_text:
555 if literal_text:
554 result.append(literal_text)
556 result.append(literal_text)
555
557
556 # if there's a field, output it
558 # if there's a field, output it
557 if field_name is not None:
559 if field_name is not None:
558 # this is some markup, find the object and do
560 # this is some markup, find the object and do
559 # the formatting
561 # the formatting
560
562
561 if format_spec:
563 if format_spec:
562 # override format spec, to allow slicing:
564 # override format spec, to allow slicing:
563 field_name = ':'.join([field_name, format_spec])
565 field_name = ':'.join([field_name, format_spec])
564
566
565 # eval the contents of the field for the object
567 # eval the contents of the field for the object
566 # to be formatted
568 # to be formatted
567 obj = eval(field_name, kwargs)
569 obj = eval(field_name, kwargs)
568
570
569 # do any conversion on the resulting object
571 # do any conversion on the resulting object
570 obj = self.convert_field(obj, conversion)
572 obj = self.convert_field(obj, conversion)
571
573
572 # format the object and append to the result
574 # format the object and append to the result
573 result.append(self.format_field(obj, ''))
575 result.append(self.format_field(obj, ''))
574
576
575 return u''.join(py3compat.cast_unicode(s) for s in result)
577 return u''.join(py3compat.cast_unicode(s) for s in result)
576
578
577
579
578 @skip_doctest_py3
580 @skip_doctest_py3
579 class DollarFormatter(FullEvalFormatter):
581 class DollarFormatter(FullEvalFormatter):
580 """Formatter allowing Itpl style $foo replacement, for names and attribute
582 """Formatter allowing Itpl style $foo replacement, for names and attribute
581 access only. Standard {foo} replacement also works, and allows full
583 access only. Standard {foo} replacement also works, and allows full
582 evaluation of its arguments.
584 evaluation of its arguments.
583
585
584 Examples
586 Examples
585 --------
587 --------
586 ::
588 ::
587
589
588 In [1]: f = DollarFormatter()
590 In [1]: f = DollarFormatter()
589 In [2]: f.format('{n//4}', n=8)
591 In [2]: f.format('{n//4}', n=8)
590 Out[2]: u'2'
592 Out[2]: u'2'
591
593
592 In [3]: f.format('23 * 76 is $result', result=23*76)
594 In [3]: f.format('23 * 76 is $result', result=23*76)
593 Out[3]: u'23 * 76 is 1748'
595 Out[3]: u'23 * 76 is 1748'
594
596
595 In [4]: f.format('$a or {b}', a=1, b=2)
597 In [4]: f.format('$a or {b}', a=1, b=2)
596 Out[4]: u'1 or 2'
598 Out[4]: u'1 or 2'
597 """
599 """
598 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
600 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
599 def parse(self, fmt_string):
601 def parse(self, fmt_string):
600 for literal_txt, field_name, format_spec, conversion \
602 for literal_txt, field_name, format_spec, conversion \
601 in Formatter.parse(self, fmt_string):
603 in Formatter.parse(self, fmt_string):
602
604
603 # Find $foo patterns in the literal text.
605 # Find $foo patterns in the literal text.
604 continue_from = 0
606 continue_from = 0
605 txt = ""
607 txt = ""
606 for m in self._dollar_pattern.finditer(literal_txt):
608 for m in self._dollar_pattern.finditer(literal_txt):
607 new_txt, new_field = m.group(1,2)
609 new_txt, new_field = m.group(1,2)
608 # $$foo --> $foo
610 # $$foo --> $foo
609 if new_field.startswith("$"):
611 if new_field.startswith("$"):
610 txt += new_txt + new_field
612 txt += new_txt + new_field
611 else:
613 else:
612 yield (txt + new_txt, new_field, "", None)
614 yield (txt + new_txt, new_field, "", None)
613 txt = ""
615 txt = ""
614 continue_from = m.end()
616 continue_from = m.end()
615
617
616 # Re-yield the {foo} style pattern
618 # Re-yield the {foo} style pattern
617 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
619 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
618
620
619 #-----------------------------------------------------------------------------
621 #-----------------------------------------------------------------------------
620 # Utils to columnize a list of string
622 # Utils to columnize a list of string
621 #-----------------------------------------------------------------------------
623 #-----------------------------------------------------------------------------
622
624
623 def _chunks(l, n):
625 def _chunks(l, n):
624 """Yield successive n-sized chunks from l."""
626 """Yield successive n-sized chunks from l."""
625 for i in py3compat.xrange(0, len(l), n):
627 for i in py3compat.xrange(0, len(l), n):
626 yield l[i:i+n]
628 yield l[i:i+n]
627
629
628
630
629 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
631 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
630 """Calculate optimal info to columnize a list of string"""
632 """Calculate optimal info to columnize a list of string"""
631 for nrow in range(1, len(rlist)+1) :
633 for nrow in range(1, len(rlist)+1) :
632 chk = list(map(max,_chunks(rlist, nrow)))
634 chk = list(map(max,_chunks(rlist, nrow)))
633 sumlength = sum(chk)
635 sumlength = sum(chk)
634 ncols = len(chk)
636 ncols = len(chk)
635 if sumlength+separator_size*(ncols-1) <= displaywidth :
637 if sumlength+separator_size*(ncols-1) <= displaywidth :
636 break;
638 break;
637 return {'columns_numbers' : ncols,
639 return {'columns_numbers' : ncols,
638 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
640 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
639 'rows_numbers' : nrow,
641 'rows_numbers' : nrow,
640 'columns_width' : chk
642 'columns_width' : chk
641 }
643 }
642
644
643
645
644 def _get_or_default(mylist, i, default=None):
646 def _get_or_default(mylist, i, default=None):
645 """return list item number, or default if don't exist"""
647 """return list item number, or default if don't exist"""
646 if i >= len(mylist):
648 if i >= len(mylist):
647 return default
649 return default
648 else :
650 else :
649 return mylist[i]
651 return mylist[i]
650
652
651
653
652 @skip_doctest
654 @skip_doctest
653 def compute_item_matrix(items, empty=None, *args, **kwargs) :
655 def compute_item_matrix(items, empty=None, *args, **kwargs) :
654 """Returns a nested list, and info to columnize items
656 """Returns a nested list, and info to columnize items
655
657
656 Parameters
658 Parameters
657 ----------
659 ----------
658
660
659 items
661 items
660 list of strings to columize
662 list of strings to columize
661 empty : (default None)
663 empty : (default None)
662 default value to fill list if needed
664 default value to fill list if needed
663 separator_size : int (default=2)
665 separator_size : int (default=2)
664 How much caracters will be used as a separation between each columns.
666 How much caracters will be used as a separation between each columns.
665 displaywidth : int (default=80)
667 displaywidth : int (default=80)
666 The width of the area onto wich the columns should enter
668 The width of the area onto wich the columns should enter
667
669
668 Returns
670 Returns
669 -------
671 -------
670
672
671 strings_matrix
673 strings_matrix
672
674
673 nested list of string, the outer most list contains as many list as
675 nested list of string, the outer most list contains as many list as
674 rows, the innermost lists have each as many element as colums. If the
676 rows, the innermost lists have each as many element as colums. If the
675 total number of elements in `items` does not equal the product of
677 total number of elements in `items` does not equal the product of
676 rows*columns, the last element of some lists are filled with `None`.
678 rows*columns, the last element of some lists are filled with `None`.
677
679
678 dict_info
680 dict_info
679 some info to make columnize easier:
681 some info to make columnize easier:
680
682
681 columns_numbers
683 columns_numbers
682 number of columns
684 number of columns
683 rows_numbers
685 rows_numbers
684 number of rows
686 number of rows
685 columns_width
687 columns_width
686 list of with of each columns
688 list of with of each columns
687 optimal_separator_width
689 optimal_separator_width
688 best separator width between columns
690 best separator width between columns
689
691
690 Examples
692 Examples
691 --------
693 --------
692 ::
694 ::
693
695
694 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
696 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
695 ...: compute_item_matrix(l,displaywidth=12)
697 ...: compute_item_matrix(l,displaywidth=12)
696 Out[1]:
698 Out[1]:
697 ([['aaa', 'f', 'k'],
699 ([['aaa', 'f', 'k'],
698 ['b', 'g', 'l'],
700 ['b', 'g', 'l'],
699 ['cc', 'h', None],
701 ['cc', 'h', None],
700 ['d', 'i', None],
702 ['d', 'i', None],
701 ['eeeee', 'j', None]],
703 ['eeeee', 'j', None]],
702 {'columns_numbers': 3,
704 {'columns_numbers': 3,
703 'columns_width': [5, 1, 1],
705 'columns_width': [5, 1, 1],
704 'optimal_separator_width': 2,
706 'optimal_separator_width': 2,
705 'rows_numbers': 5})
707 'rows_numbers': 5})
706 """
708 """
707 info = _find_optimal(list(map(len, items)), *args, **kwargs)
709 info = _find_optimal(list(map(len, items)), *args, **kwargs)
708 nrow, ncol = info['rows_numbers'], info['columns_numbers']
710 nrow, ncol = info['rows_numbers'], info['columns_numbers']
709 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
711 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
710
712
711
713
712 def columnize(items, separator=' ', displaywidth=80):
714 def columnize(items, separator=' ', displaywidth=80):
713 """ Transform a list of strings into a single string with columns.
715 """ Transform a list of strings into a single string with columns.
714
716
715 Parameters
717 Parameters
716 ----------
718 ----------
717 items : sequence of strings
719 items : sequence of strings
718 The strings to process.
720 The strings to process.
719
721
720 separator : str, optional [default is two spaces]
722 separator : str, optional [default is two spaces]
721 The string that separates columns.
723 The string that separates columns.
722
724
723 displaywidth : int, optional [default is 80]
725 displaywidth : int, optional [default is 80]
724 Width of the display in number of characters.
726 Width of the display in number of characters.
725
727
726 Returns
728 Returns
727 -------
729 -------
728 The formatted string.
730 The formatted string.
729 """
731 """
730 if not items :
732 if not items :
731 return '\n'
733 return '\n'
732 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
734 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
733 fmatrix = [filter(None, x) for x in matrix]
735 fmatrix = [filter(None, x) for x in matrix]
734 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
736 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
735 return '\n'.join(map(sjoin, fmatrix))+'\n'
737 return '\n'.join(map(sjoin, fmatrix))+'\n'
736
738
737
739
738 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
740 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
739 """
741 """
740 Return a string with a natural enumeration of items
742 Return a string with a natural enumeration of items
741
743
742 >>> get_text_list(['a', 'b', 'c', 'd'])
744 >>> get_text_list(['a', 'b', 'c', 'd'])
743 'a, b, c and d'
745 'a, b, c and d'
744 >>> get_text_list(['a', 'b', 'c'], ' or ')
746 >>> get_text_list(['a', 'b', 'c'], ' or ')
745 'a, b or c'
747 'a, b or c'
746 >>> get_text_list(['a', 'b', 'c'], ', ')
748 >>> get_text_list(['a', 'b', 'c'], ', ')
747 'a, b, c'
749 'a, b, c'
748 >>> get_text_list(['a', 'b'], ' or ')
750 >>> get_text_list(['a', 'b'], ' or ')
749 'a or b'
751 'a or b'
750 >>> get_text_list(['a'])
752 >>> get_text_list(['a'])
751 'a'
753 'a'
752 >>> get_text_list([])
754 >>> get_text_list([])
753 ''
755 ''
754 >>> get_text_list(['a', 'b'], wrap_item_with="`")
756 >>> get_text_list(['a', 'b'], wrap_item_with="`")
755 '`a` and `b`'
757 '`a` and `b`'
756 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
758 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
757 'a + b + c = d'
759 'a + b + c = d'
758 """
760 """
759 if len(list_) == 0:
761 if len(list_) == 0:
760 return ''
762 return ''
761 if wrap_item_with:
763 if wrap_item_with:
762 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
764 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
763 item in list_]
765 item in list_]
764 if len(list_) == 1:
766 if len(list_) == 1:
765 return list_[0]
767 return list_[0]
766 return '%s%s%s' % (
768 return '%s%s%s' % (
767 sep.join(i for i in list_[:-1]),
769 sep.join(i for i in list_[:-1]),
768 last_sep, list_[-1]) No newline at end of file
770 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now