##// END OF EJS Templates
Fix docstrings in utils.text
Thomas Kluyver -
Show More
@@ -1,759 +1,768 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2008-2011 The IPython Development Team
12 # Copyright (C) 2008-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21
21
22 import os
22 import os
23 import re
23 import re
24 import sys
24 import sys
25 import textwrap
25 import textwrap
26 from string import Formatter
26 from string import Formatter
27
27
28 from IPython.external.path import path
28 from IPython.external.path import path
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
30 from IPython.utils import py3compat
30 from IPython.utils import py3compat
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Declarations
33 # Declarations
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 # datetime.strftime date format for ipython
36 # datetime.strftime date format for ipython
37 if sys.platform == 'win32':
37 if sys.platform == 'win32':
38 date_format = "%B %d, %Y"
38 date_format = "%B %d, %Y"
39 else:
39 else:
40 date_format = "%B %-d, %Y"
40 date_format = "%B %-d, %Y"
41
41
42
42
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44 # Code
44 # Code
45 #-----------------------------------------------------------------------------
45 #-----------------------------------------------------------------------------
46
46
47 class LSString(str):
47 class LSString(str):
48 """String derivative with a special access attributes.
48 """String derivative with a special access attributes.
49
49
50 These are normal strings, but with the special attributes:
50 These are normal strings, but with the special attributes:
51
51
52 .l (or .list) : value as list (split on newlines).
52 .l (or .list) : value as list (split on newlines).
53 .n (or .nlstr): original value (the string itself).
53 .n (or .nlstr): original value (the string itself).
54 .s (or .spstr): value as whitespace-separated string.
54 .s (or .spstr): value as whitespace-separated string.
55 .p (or .paths): list of path objects
55 .p (or .paths): list of path objects
56
56
57 Any values which require transformations are computed only once and
57 Any values which require transformations are computed only once and
58 cached.
58 cached.
59
59
60 Such strings are very useful to efficiently interact with the shell, which
60 Such strings are very useful to efficiently interact with the shell, which
61 typically only understands whitespace-separated options for commands."""
61 typically only understands whitespace-separated options for commands."""
62
62
63 def get_list(self):
63 def get_list(self):
64 try:
64 try:
65 return self.__list
65 return self.__list
66 except AttributeError:
66 except AttributeError:
67 self.__list = self.split('\n')
67 self.__list = self.split('\n')
68 return self.__list
68 return self.__list
69
69
70 l = list = property(get_list)
70 l = list = property(get_list)
71
71
72 def get_spstr(self):
72 def get_spstr(self):
73 try:
73 try:
74 return self.__spstr
74 return self.__spstr
75 except AttributeError:
75 except AttributeError:
76 self.__spstr = self.replace('\n',' ')
76 self.__spstr = self.replace('\n',' ')
77 return self.__spstr
77 return self.__spstr
78
78
79 s = spstr = property(get_spstr)
79 s = spstr = property(get_spstr)
80
80
81 def get_nlstr(self):
81 def get_nlstr(self):
82 return self
82 return self
83
83
84 n = nlstr = property(get_nlstr)
84 n = nlstr = property(get_nlstr)
85
85
86 def get_paths(self):
86 def get_paths(self):
87 try:
87 try:
88 return self.__paths
88 return self.__paths
89 except AttributeError:
89 except AttributeError:
90 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
90 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
91 return self.__paths
91 return self.__paths
92
92
93 p = paths = property(get_paths)
93 p = paths = property(get_paths)
94
94
95 # FIXME: We need to reimplement type specific displayhook and then add this
95 # FIXME: We need to reimplement type specific displayhook and then add this
96 # back as a custom printer. This should also be moved outside utils into the
96 # back as a custom printer. This should also be moved outside utils into the
97 # core.
97 # core.
98
98
99 # def print_lsstring(arg):
99 # def print_lsstring(arg):
100 # """ Prettier (non-repr-like) and more informative printer for LSString """
100 # """ Prettier (non-repr-like) and more informative printer for LSString """
101 # print "LSString (.p, .n, .l, .s available). Value:"
101 # print "LSString (.p, .n, .l, .s available). Value:"
102 # print arg
102 # print arg
103 #
103 #
104 #
104 #
105 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
105 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
106
106
107
107
108 class SList(list):
108 class SList(list):
109 """List derivative with a special access attributes.
109 """List derivative with a special access attributes.
110
110
111 These are normal lists, but with the special attributes:
111 These are normal lists, but with the special attributes:
112
112
113 * .l (or .list) : value as list (the list itself).
113 * .l (or .list) : value as list (the list itself).
114 * .n (or .nlstr): value as a string, joined on newlines.
114 * .n (or .nlstr): value as a string, joined on newlines.
115 * .s (or .spstr): value as a string, joined on spaces.
115 * .s (or .spstr): value as a string, joined on spaces.
116 * .p (or .paths): list of path objects
116 * .p (or .paths): list of path objects
117
117
118 Any values which require transformations are computed only once and
118 Any values which require transformations are computed only once and
119 cached."""
119 cached."""
120
120
121 def get_list(self):
121 def get_list(self):
122 return self
122 return self
123
123
124 l = list = property(get_list)
124 l = list = property(get_list)
125
125
126 def get_spstr(self):
126 def get_spstr(self):
127 try:
127 try:
128 return self.__spstr
128 return self.__spstr
129 except AttributeError:
129 except AttributeError:
130 self.__spstr = ' '.join(self)
130 self.__spstr = ' '.join(self)
131 return self.__spstr
131 return self.__spstr
132
132
133 s = spstr = property(get_spstr)
133 s = spstr = property(get_spstr)
134
134
135 def get_nlstr(self):
135 def get_nlstr(self):
136 try:
136 try:
137 return self.__nlstr
137 return self.__nlstr
138 except AttributeError:
138 except AttributeError:
139 self.__nlstr = '\n'.join(self)
139 self.__nlstr = '\n'.join(self)
140 return self.__nlstr
140 return self.__nlstr
141
141
142 n = nlstr = property(get_nlstr)
142 n = nlstr = property(get_nlstr)
143
143
144 def get_paths(self):
144 def get_paths(self):
145 try:
145 try:
146 return self.__paths
146 return self.__paths
147 except AttributeError:
147 except AttributeError:
148 self.__paths = [path(p) for p in self if os.path.exists(p)]
148 self.__paths = [path(p) for p in self if os.path.exists(p)]
149 return self.__paths
149 return self.__paths
150
150
151 p = paths = property(get_paths)
151 p = paths = property(get_paths)
152
152
153 def grep(self, pattern, prune = False, field = None):
153 def grep(self, pattern, prune = False, field = None):
154 """ Return all strings matching 'pattern' (a regex or callable)
154 """ Return all strings matching 'pattern' (a regex or callable)
155
155
156 This is case-insensitive. If prune is true, return all items
156 This is case-insensitive. If prune is true, return all items
157 NOT matching the pattern.
157 NOT matching the pattern.
158
158
159 If field is specified, the match must occur in the specified
159 If field is specified, the match must occur in the specified
160 whitespace-separated field.
160 whitespace-separated field.
161
161
162 Examples::
162 Examples::
163
163
164 a.grep( lambda x: x.startswith('C') )
164 a.grep( lambda x: x.startswith('C') )
165 a.grep('Cha.*log', prune=1)
165 a.grep('Cha.*log', prune=1)
166 a.grep('chm', field=-1)
166 a.grep('chm', field=-1)
167 """
167 """
168
168
169 def match_target(s):
169 def match_target(s):
170 if field is None:
170 if field is None:
171 return s
171 return s
172 parts = s.split()
172 parts = s.split()
173 try:
173 try:
174 tgt = parts[field]
174 tgt = parts[field]
175 return tgt
175 return tgt
176 except IndexError:
176 except IndexError:
177 return ""
177 return ""
178
178
179 if isinstance(pattern, py3compat.string_types):
179 if isinstance(pattern, py3compat.string_types):
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
181 else:
181 else:
182 pred = pattern
182 pred = pattern
183 if not prune:
183 if not prune:
184 return SList([el for el in self if pred(match_target(el))])
184 return SList([el for el in self if pred(match_target(el))])
185 else:
185 else:
186 return SList([el for el in self if not pred(match_target(el))])
186 return SList([el for el in self if not pred(match_target(el))])
187
187
188 def fields(self, *fields):
188 def fields(self, *fields):
189 """ Collect whitespace-separated fields from string list
189 """ Collect whitespace-separated fields from string list
190
190
191 Allows quick awk-like usage of string lists.
191 Allows quick awk-like usage of string lists.
192
192
193 Example data (in var a, created by 'a = !ls -l')::
193 Example data (in var a, created by 'a = !ls -l')::
194
194
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
197
197
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
200 (note the joining by space).
200 (note the joining by space).
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
202
202
203 IndexErrors are ignored.
203 IndexErrors are ignored.
204
204
205 Without args, fields() just split()'s the strings.
205 Without args, fields() just split()'s the strings.
206 """
206 """
207 if len(fields) == 0:
207 if len(fields) == 0:
208 return [el.split() for el in self]
208 return [el.split() for el in self]
209
209
210 res = SList()
210 res = SList()
211 for el in [f.split() for f in self]:
211 for el in [f.split() for f in self]:
212 lineparts = []
212 lineparts = []
213
213
214 for fd in fields:
214 for fd in fields:
215 try:
215 try:
216 lineparts.append(el[fd])
216 lineparts.append(el[fd])
217 except IndexError:
217 except IndexError:
218 pass
218 pass
219 if lineparts:
219 if lineparts:
220 res.append(" ".join(lineparts))
220 res.append(" ".join(lineparts))
221
221
222 return res
222 return res
223
223
224 def sort(self,field= None, nums = False):
224 def sort(self,field= None, nums = False):
225 """ sort by specified fields (see fields())
225 """ sort by specified fields (see fields())
226
226
227 Example::
227 Example::
228
228 a.sort(1, nums = True)
229 a.sort(1, nums = True)
229
230
230 Sorts a by second field, in numerical order (so that 21 > 3)
231 Sorts a by second field, in numerical order (so that 21 > 3)
231
232
232 """
233 """
233
234
234 #decorate, sort, undecorate
235 #decorate, sort, undecorate
235 if field is not None:
236 if field is not None:
236 dsu = [[SList([line]).fields(field), line] for line in self]
237 dsu = [[SList([line]).fields(field), line] for line in self]
237 else:
238 else:
238 dsu = [[line, line] for line in self]
239 dsu = [[line, line] for line in self]
239 if nums:
240 if nums:
240 for i in range(len(dsu)):
241 for i in range(len(dsu)):
241 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
242 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
242 try:
243 try:
243 n = int(numstr)
244 n = int(numstr)
244 except ValueError:
245 except ValueError:
245 n = 0;
246 n = 0;
246 dsu[i][0] = n
247 dsu[i][0] = n
247
248
248
249
249 dsu.sort()
250 dsu.sort()
250 return SList([t[1] for t in dsu])
251 return SList([t[1] for t in dsu])
251
252
252
253
253 # FIXME: We need to reimplement type specific displayhook and then add this
254 # FIXME: We need to reimplement type specific displayhook and then add this
254 # back as a custom printer. This should also be moved outside utils into the
255 # back as a custom printer. This should also be moved outside utils into the
255 # core.
256 # core.
256
257
257 # def print_slist(arg):
258 # def print_slist(arg):
258 # """ Prettier (non-repr-like) and more informative printer for SList """
259 # """ Prettier (non-repr-like) and more informative printer for SList """
259 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
260 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
260 # if hasattr(arg, 'hideonce') and arg.hideonce:
261 # if hasattr(arg, 'hideonce') and arg.hideonce:
261 # arg.hideonce = False
262 # arg.hideonce = False
262 # return
263 # return
263 #
264 #
264 # nlprint(arg) # This was a nested list printer, now removed.
265 # nlprint(arg) # This was a nested list printer, now removed.
265 #
266 #
266 # print_slist = result_display.when_type(SList)(print_slist)
267 # print_slist = result_display.when_type(SList)(print_slist)
267
268
268
269
269 def indent(instr,nspaces=4, ntabs=0, flatten=False):
270 def indent(instr,nspaces=4, ntabs=0, flatten=False):
270 """Indent a string a given number of spaces or tabstops.
271 """Indent a string a given number of spaces or tabstops.
271
272
272 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
273 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
273
274
274 Parameters
275 Parameters
275 ----------
276 ----------
276
277
277 instr : basestring
278 instr : basestring
278 The string to be indented.
279 The string to be indented.
279 nspaces : int (default: 4)
280 nspaces : int (default: 4)
280 The number of spaces to be indented.
281 The number of spaces to be indented.
281 ntabs : int (default: 0)
282 ntabs : int (default: 0)
282 The number of tabs to be indented.
283 The number of tabs to be indented.
283 flatten : bool (default: False)
284 flatten : bool (default: False)
284 Whether to scrub existing indentation. If True, all lines will be
285 Whether to scrub existing indentation. If True, all lines will be
285 aligned to the same indentation. If False, existing indentation will
286 aligned to the same indentation. If False, existing indentation will
286 be strictly increased.
287 be strictly increased.
287
288
288 Returns
289 Returns
289 -------
290 -------
290
291
291 str|unicode : string indented by ntabs and nspaces.
292 str|unicode : string indented by ntabs and nspaces.
292
293
293 """
294 """
294 if instr is None:
295 if instr is None:
295 return
296 return
296 ind = '\t'*ntabs+' '*nspaces
297 ind = '\t'*ntabs+' '*nspaces
297 if flatten:
298 if flatten:
298 pat = re.compile(r'^\s*', re.MULTILINE)
299 pat = re.compile(r'^\s*', re.MULTILINE)
299 else:
300 else:
300 pat = re.compile(r'^', re.MULTILINE)
301 pat = re.compile(r'^', re.MULTILINE)
301 outstr = re.sub(pat, ind, instr)
302 outstr = re.sub(pat, ind, instr)
302 if outstr.endswith(os.linesep+ind):
303 if outstr.endswith(os.linesep+ind):
303 return outstr[:-len(ind)]
304 return outstr[:-len(ind)]
304 else:
305 else:
305 return outstr
306 return outstr
306
307
307
308
308 def list_strings(arg):
309 def list_strings(arg):
309 """Always return a list of strings, given a string or list of strings
310 """Always return a list of strings, given a string or list of strings
310 as input.
311 as input.
311
312
312 :Examples:
313 :Examples:
314 ::
313
315
314 In [7]: list_strings('A single string')
316 In [7]: list_strings('A single string')
315 Out[7]: ['A single string']
317 Out[7]: ['A single string']
316
318
317 In [8]: list_strings(['A single string in a list'])
319 In [8]: list_strings(['A single string in a list'])
318 Out[8]: ['A single string in a list']
320 Out[8]: ['A single string in a list']
319
321
320 In [9]: list_strings(['A','list','of','strings'])
322 In [9]: list_strings(['A','list','of','strings'])
321 Out[9]: ['A', 'list', 'of', 'strings']
323 Out[9]: ['A', 'list', 'of', 'strings']
322 """
324 """
323
325
324 if isinstance(arg, py3compat.string_types): return [arg]
326 if isinstance(arg, py3compat.string_types): return [arg]
325 else: return arg
327 else: return arg
326
328
327
329
328 def marquee(txt='',width=78,mark='*'):
330 def marquee(txt='',width=78,mark='*'):
329 """Return the input string centered in a 'marquee'.
331 """Return the input string centered in a 'marquee'.
330
332
331 :Examples:
333 :Examples:
334 ::
332
335
333 In [16]: marquee('A test',40)
336 In [16]: marquee('A test',40)
334 Out[16]: '**************** A test ****************'
337 Out[16]: '**************** A test ****************'
335
338
336 In [17]: marquee('A test',40,'-')
339 In [17]: marquee('A test',40,'-')
337 Out[17]: '---------------- A test ----------------'
340 Out[17]: '---------------- A test ----------------'
338
341
339 In [18]: marquee('A test',40,' ')
342 In [18]: marquee('A test',40,' ')
340 Out[18]: ' A test '
343 Out[18]: ' A test '
341
344
342 """
345 """
343 if not txt:
346 if not txt:
344 return (mark*width)[:width]
347 return (mark*width)[:width]
345 nmark = (width-len(txt)-2)//len(mark)//2
348 nmark = (width-len(txt)-2)//len(mark)//2
346 if nmark < 0: nmark =0
349 if nmark < 0: nmark =0
347 marks = mark*nmark
350 marks = mark*nmark
348 return '%s %s %s' % (marks,txt,marks)
351 return '%s %s %s' % (marks,txt,marks)
349
352
350
353
351 ini_spaces_re = re.compile(r'^(\s+)')
354 ini_spaces_re = re.compile(r'^(\s+)')
352
355
353 def num_ini_spaces(strng):
356 def num_ini_spaces(strng):
354 """Return the number of initial spaces in a string"""
357 """Return the number of initial spaces in a string"""
355
358
356 ini_spaces = ini_spaces_re.match(strng)
359 ini_spaces = ini_spaces_re.match(strng)
357 if ini_spaces:
360 if ini_spaces:
358 return ini_spaces.end()
361 return ini_spaces.end()
359 else:
362 else:
360 return 0
363 return 0
361
364
362
365
363 def format_screen(strng):
366 def format_screen(strng):
364 """Format a string for screen printing.
367 """Format a string for screen printing.
365
368
366 This removes some latex-type format codes."""
369 This removes some latex-type format codes."""
367 # Paragraph continue
370 # Paragraph continue
368 par_re = re.compile(r'\\$',re.MULTILINE)
371 par_re = re.compile(r'\\$',re.MULTILINE)
369 strng = par_re.sub('',strng)
372 strng = par_re.sub('',strng)
370 return strng
373 return strng
371
374
372
375
373 def dedent(text):
376 def dedent(text):
374 """Equivalent of textwrap.dedent that ignores unindented first line.
377 """Equivalent of textwrap.dedent that ignores unindented first line.
375
378
376 This means it will still dedent strings like:
379 This means it will still dedent strings like:
377 '''foo
380 '''foo
378 is a bar
381 is a bar
379 '''
382 '''
380
383
381 For use in wrap_paragraphs.
384 For use in wrap_paragraphs.
382 """
385 """
383
386
384 if text.startswith('\n'):
387 if text.startswith('\n'):
385 # text starts with blank line, don't ignore the first line
388 # text starts with blank line, don't ignore the first line
386 return textwrap.dedent(text)
389 return textwrap.dedent(text)
387
390
388 # split first line
391 # split first line
389 splits = text.split('\n',1)
392 splits = text.split('\n',1)
390 if len(splits) == 1:
393 if len(splits) == 1:
391 # only one line
394 # only one line
392 return textwrap.dedent(text)
395 return textwrap.dedent(text)
393
396
394 first, rest = splits
397 first, rest = splits
395 # dedent everything but the first line
398 # dedent everything but the first line
396 rest = textwrap.dedent(rest)
399 rest = textwrap.dedent(rest)
397 return '\n'.join([first, rest])
400 return '\n'.join([first, rest])
398
401
399
402
400 def wrap_paragraphs(text, ncols=80):
403 def wrap_paragraphs(text, ncols=80):
401 """Wrap multiple paragraphs to fit a specified width.
404 """Wrap multiple paragraphs to fit a specified width.
402
405
403 This is equivalent to textwrap.wrap, but with support for multiple
406 This is equivalent to textwrap.wrap, but with support for multiple
404 paragraphs, as separated by empty lines.
407 paragraphs, as separated by empty lines.
405
408
406 Returns
409 Returns
407 -------
410 -------
408
411
409 list of complete paragraphs, wrapped to fill `ncols` columns.
412 list of complete paragraphs, wrapped to fill `ncols` columns.
410 """
413 """
411 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
414 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
412 text = dedent(text).strip()
415 text = dedent(text).strip()
413 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
416 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
414 out_ps = []
417 out_ps = []
415 indent_re = re.compile(r'\n\s+', re.MULTILINE)
418 indent_re = re.compile(r'\n\s+', re.MULTILINE)
416 for p in paragraphs:
419 for p in paragraphs:
417 # presume indentation that survives dedent is meaningful formatting,
420 # presume indentation that survives dedent is meaningful formatting,
418 # so don't fill unless text is flush.
421 # so don't fill unless text is flush.
419 if indent_re.search(p) is None:
422 if indent_re.search(p) is None:
420 # wrap paragraph
423 # wrap paragraph
421 p = textwrap.fill(p, ncols)
424 p = textwrap.fill(p, ncols)
422 out_ps.append(p)
425 out_ps.append(p)
423 return out_ps
426 return out_ps
424
427
425
428
426 def long_substr(data):
429 def long_substr(data):
427 """Return the longest common substring in a list of strings.
430 """Return the longest common substring in a list of strings.
428
431
429 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
432 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
430 """
433 """
431 substr = ''
434 substr = ''
432 if len(data) > 1 and len(data[0]) > 0:
435 if len(data) > 1 and len(data[0]) > 0:
433 for i in range(len(data[0])):
436 for i in range(len(data[0])):
434 for j in range(len(data[0])-i+1):
437 for j in range(len(data[0])-i+1):
435 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
438 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
436 substr = data[0][i:i+j]
439 substr = data[0][i:i+j]
437 elif len(data) == 1:
440 elif len(data) == 1:
438 substr = data[0]
441 substr = data[0]
439 return substr
442 return substr
440
443
441
444
442 def strip_email_quotes(text):
445 def strip_email_quotes(text):
443 """Strip leading email quotation characters ('>').
446 """Strip leading email quotation characters ('>').
444
447
445 Removes any combination of leading '>' interspersed with whitespace that
448 Removes any combination of leading '>' interspersed with whitespace that
446 appears *identically* in all lines of the input text.
449 appears *identically* in all lines of the input text.
447
450
448 Parameters
451 Parameters
449 ----------
452 ----------
450 text : str
453 text : str
451
454
452 Examples
455 Examples
453 --------
456 --------
454
457
455 Simple uses::
458 Simple uses::
456
459
457 In [2]: strip_email_quotes('> > text')
460 In [2]: strip_email_quotes('> > text')
458 Out[2]: 'text'
461 Out[2]: 'text'
459
462
460 In [3]: strip_email_quotes('> > text\\n> > more')
463 In [3]: strip_email_quotes('> > text\\n> > more')
461 Out[3]: 'text\\nmore'
464 Out[3]: 'text\\nmore'
462
465
463 Note how only the common prefix that appears in all lines is stripped::
466 Note how only the common prefix that appears in all lines is stripped::
464
467
465 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
468 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
466 Out[4]: '> text\\n> more\\nmore...'
469 Out[4]: '> text\\n> more\\nmore...'
467
470
468 So if any line has no quote marks ('>') , then none are stripped from any
471 So if any line has no quote marks ('>') , then none are stripped from any
469 of them ::
472 of them ::
470
473
471 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
474 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
472 Out[5]: '> > text\\n> > more\\nlast different'
475 Out[5]: '> > text\\n> > more\\nlast different'
473 """
476 """
474 lines = text.splitlines()
477 lines = text.splitlines()
475 matches = set()
478 matches = set()
476 for line in lines:
479 for line in lines:
477 prefix = re.match(r'^(\s*>[ >]*)', line)
480 prefix = re.match(r'^(\s*>[ >]*)', line)
478 if prefix:
481 if prefix:
479 matches.add(prefix.group(1))
482 matches.add(prefix.group(1))
480 else:
483 else:
481 break
484 break
482 else:
485 else:
483 prefix = long_substr(list(matches))
486 prefix = long_substr(list(matches))
484 if prefix:
487 if prefix:
485 strip = len(prefix)
488 strip = len(prefix)
486 text = '\n'.join([ ln[strip:] for ln in lines])
489 text = '\n'.join([ ln[strip:] for ln in lines])
487 return text
490 return text
488
491
489
492
490 class EvalFormatter(Formatter):
493 class EvalFormatter(Formatter):
491 """A String Formatter that allows evaluation of simple expressions.
494 """A String Formatter that allows evaluation of simple expressions.
492
495
493 Note that this version interprets a : as specifying a format string (as per
496 Note that this version interprets a : as specifying a format string (as per
494 standard string formatting), so if slicing is required, you must explicitly
497 standard string formatting), so if slicing is required, you must explicitly
495 create a slice.
498 create a slice.
496
499
497 This is to be used in templating cases, such as the parallel batch
500 This is to be used in templating cases, such as the parallel batch
498 script templates, where simple arithmetic on arguments is useful.
501 script templates, where simple arithmetic on arguments is useful.
499
502
500 Examples
503 Examples
501 --------
504 --------
502
505 ::
503 In [1]: f = EvalFormatter()
506
504 In [2]: f.format('{n//4}', n=8)
507 In [1]: f = EvalFormatter()
505 Out [2]: '2'
508 In [2]: f.format('{n//4}', n=8)
506
509 Out[2]: '2'
507 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
510
508 Out [3]: 'll'
511 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
512 Out[3]: 'll'
509 """
513 """
510 def get_field(self, name, args, kwargs):
514 def get_field(self, name, args, kwargs):
511 v = eval(name, kwargs)
515 v = eval(name, kwargs)
512 return v, name
516 return v, name
513
517
514
518
515 @skip_doctest_py3
519 @skip_doctest_py3
516 class FullEvalFormatter(Formatter):
520 class FullEvalFormatter(Formatter):
517 """A String Formatter that allows evaluation of simple expressions.
521 """A String Formatter that allows evaluation of simple expressions.
518
522
519 Any time a format key is not found in the kwargs,
523 Any time a format key is not found in the kwargs,
520 it will be tried as an expression in the kwargs namespace.
524 it will be tried as an expression in the kwargs namespace.
521
525
522 Note that this version allows slicing using [1:2], so you cannot specify
526 Note that this version allows slicing using [1:2], so you cannot specify
523 a format string. Use :class:`EvalFormatter` to permit format strings.
527 a format string. Use :class:`EvalFormatter` to permit format strings.
524
528
525 Examples
529 Examples
526 --------
530 --------
527
531 ::
528 In [1]: f = FullEvalFormatter()
532
529 In [2]: f.format('{n//4}', n=8)
533 In [1]: f = FullEvalFormatter()
530 Out[2]: u'2'
534 In [2]: f.format('{n//4}', n=8)
531
535 Out[2]: u'2'
532 In [3]: f.format('{list(range(5))[2:4]}')
536
533 Out[3]: u'[2, 3]'
537 In [3]: f.format('{list(range(5))[2:4]}')
538 Out[3]: u'[2, 3]'
534
539
535 In [4]: f.format('{3*2}')
540 In [4]: f.format('{3*2}')
536 Out[4]: u'6'
541 Out[4]: u'6'
537 """
542 """
538 # copied from Formatter._vformat with minor changes to allow eval
543 # copied from Formatter._vformat with minor changes to allow eval
539 # and replace the format_spec code with slicing
544 # and replace the format_spec code with slicing
540 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
545 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
541 if recursion_depth < 0:
546 if recursion_depth < 0:
542 raise ValueError('Max string recursion exceeded')
547 raise ValueError('Max string recursion exceeded')
543 result = []
548 result = []
544 for literal_text, field_name, format_spec, conversion in \
549 for literal_text, field_name, format_spec, conversion in \
545 self.parse(format_string):
550 self.parse(format_string):
546
551
547 # output the literal text
552 # output the literal text
548 if literal_text:
553 if literal_text:
549 result.append(literal_text)
554 result.append(literal_text)
550
555
551 # if there's a field, output it
556 # if there's a field, output it
552 if field_name is not None:
557 if field_name is not None:
553 # this is some markup, find the object and do
558 # this is some markup, find the object and do
554 # the formatting
559 # the formatting
555
560
556 if format_spec:
561 if format_spec:
557 # override format spec, to allow slicing:
562 # override format spec, to allow slicing:
558 field_name = ':'.join([field_name, format_spec])
563 field_name = ':'.join([field_name, format_spec])
559
564
560 # eval the contents of the field for the object
565 # eval the contents of the field for the object
561 # to be formatted
566 # to be formatted
562 obj = eval(field_name, kwargs)
567 obj = eval(field_name, kwargs)
563
568
564 # do any conversion on the resulting object
569 # do any conversion on the resulting object
565 obj = self.convert_field(obj, conversion)
570 obj = self.convert_field(obj, conversion)
566
571
567 # format the object and append to the result
572 # format the object and append to the result
568 result.append(self.format_field(obj, ''))
573 result.append(self.format_field(obj, ''))
569
574
570 return u''.join(py3compat.cast_unicode(s) for s in result)
575 return u''.join(py3compat.cast_unicode(s) for s in result)
571
576
572
577
573 @skip_doctest_py3
578 @skip_doctest_py3
574 class DollarFormatter(FullEvalFormatter):
579 class DollarFormatter(FullEvalFormatter):
575 """Formatter allowing Itpl style $foo replacement, for names and attribute
580 """Formatter allowing Itpl style $foo replacement, for names and attribute
576 access only. Standard {foo} replacement also works, and allows full
581 access only. Standard {foo} replacement also works, and allows full
577 evaluation of its arguments.
582 evaluation of its arguments.
578
583
579 Examples
584 Examples
580 --------
585 --------
581 In [1]: f = DollarFormatter()
586 ::
582 In [2]: f.format('{n//4}', n=8)
587
583 Out[2]: u'2'
588 In [1]: f = DollarFormatter()
584
589 In [2]: f.format('{n//4}', n=8)
585 In [3]: f.format('23 * 76 is $result', result=23*76)
590 Out[2]: u'2'
586 Out[3]: u'23 * 76 is 1748'
591
587
592 In [3]: f.format('23 * 76 is $result', result=23*76)
588 In [4]: f.format('$a or {b}', a=1, b=2)
593 Out[3]: u'23 * 76 is 1748'
589 Out[4]: u'1 or 2'
594
595 In [4]: f.format('$a or {b}', a=1, b=2)
596 Out[4]: u'1 or 2'
590 """
597 """
591 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
598 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
592 def parse(self, fmt_string):
599 def parse(self, fmt_string):
593 for literal_txt, field_name, format_spec, conversion \
600 for literal_txt, field_name, format_spec, conversion \
594 in Formatter.parse(self, fmt_string):
601 in Formatter.parse(self, fmt_string):
595
602
596 # Find $foo patterns in the literal text.
603 # Find $foo patterns in the literal text.
597 continue_from = 0
604 continue_from = 0
598 txt = ""
605 txt = ""
599 for m in self._dollar_pattern.finditer(literal_txt):
606 for m in self._dollar_pattern.finditer(literal_txt):
600 new_txt, new_field = m.group(1,2)
607 new_txt, new_field = m.group(1,2)
601 # $$foo --> $foo
608 # $$foo --> $foo
602 if new_field.startswith("$"):
609 if new_field.startswith("$"):
603 txt += new_txt + new_field
610 txt += new_txt + new_field
604 else:
611 else:
605 yield (txt + new_txt, new_field, "", None)
612 yield (txt + new_txt, new_field, "", None)
606 txt = ""
613 txt = ""
607 continue_from = m.end()
614 continue_from = m.end()
608
615
609 # Re-yield the {foo} style pattern
616 # Re-yield the {foo} style pattern
610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
617 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
611
618
612 #-----------------------------------------------------------------------------
619 #-----------------------------------------------------------------------------
613 # Utils to columnize a list of string
620 # Utils to columnize a list of string
614 #-----------------------------------------------------------------------------
621 #-----------------------------------------------------------------------------
615
622
616 def _chunks(l, n):
623 def _chunks(l, n):
617 """Yield successive n-sized chunks from l."""
624 """Yield successive n-sized chunks from l."""
618 for i in py3compat.xrange(0, len(l), n):
625 for i in py3compat.xrange(0, len(l), n):
619 yield l[i:i+n]
626 yield l[i:i+n]
620
627
621
628
622 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
629 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
623 """Calculate optimal info to columnize a list of string"""
630 """Calculate optimal info to columnize a list of string"""
624 for nrow in range(1, len(rlist)+1) :
631 for nrow in range(1, len(rlist)+1) :
625 chk = list(map(max,_chunks(rlist, nrow)))
632 chk = list(map(max,_chunks(rlist, nrow)))
626 sumlength = sum(chk)
633 sumlength = sum(chk)
627 ncols = len(chk)
634 ncols = len(chk)
628 if sumlength+separator_size*(ncols-1) <= displaywidth :
635 if sumlength+separator_size*(ncols-1) <= displaywidth :
629 break;
636 break;
630 return {'columns_numbers' : ncols,
637 return {'columns_numbers' : ncols,
631 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
638 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
632 'rows_numbers' : nrow,
639 'rows_numbers' : nrow,
633 'columns_width' : chk
640 'columns_width' : chk
634 }
641 }
635
642
636
643
637 def _get_or_default(mylist, i, default=None):
644 def _get_or_default(mylist, i, default=None):
638 """return list item number, or default if don't exist"""
645 """return list item number, or default if don't exist"""
639 if i >= len(mylist):
646 if i >= len(mylist):
640 return default
647 return default
641 else :
648 else :
642 return mylist[i]
649 return mylist[i]
643
650
644
651
645 @skip_doctest
652 @skip_doctest
646 def compute_item_matrix(items, empty=None, *args, **kwargs) :
653 def compute_item_matrix(items, empty=None, *args, **kwargs) :
647 """Returns a nested list, and info to columnize items
654 """Returns a nested list, and info to columnize items
648
655
649 Parameters
656 Parameters
650 ----------
657 ----------
651
658
652 items :
659 items
653 list of strings to columize
660 list of strings to columize
654 empty : (default None)
661 empty : (default None)
655 default value to fill list if needed
662 default value to fill list if needed
656 separator_size : int (default=2)
663 separator_size : int (default=2)
657 How much caracters will be used as a separation between each columns.
664 How much caracters will be used as a separation between each columns.
658 displaywidth : int (default=80)
665 displaywidth : int (default=80)
659 The width of the area onto wich the columns should enter
666 The width of the area onto wich the columns should enter
660
667
661 Returns
668 Returns
662 -------
669 -------
663
670
664 Returns a tuple of (strings_matrix, dict_info)
671 strings_matrix
665
666 strings_matrix :
667
672
668 nested list of string, the outer most list contains as many list as
673 nested list of string, the outer most list contains as many list as
669 rows, the innermost lists have each as many element as colums. If the
674 rows, the innermost lists have each as many element as colums. If the
670 total number of elements in `items` does not equal the product of
675 total number of elements in `items` does not equal the product of
671 rows*columns, the last element of some lists are filled with `None`.
676 rows*columns, the last element of some lists are filled with `None`.
672
677
673 dict_info :
678 dict_info
674 some info to make columnize easier:
679 some info to make columnize easier:
675
680
676 columns_numbers : number of columns
681 columns_numbers
677 rows_numbers : number of rows
682 number of columns
678 columns_width : list of with of each columns
683 rows_numbers
679 optimal_separator_width : best separator width between columns
684 number of rows
685 columns_width
686 list of with of each columns
687 optimal_separator_width
688 best separator width between columns
680
689
681 Examples
690 Examples
682 --------
691 --------
683
692 ::
684 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
693
685 ...: compute_item_matrix(l,displaywidth=12)
694 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
686 Out[1]:
695 ...: compute_item_matrix(l,displaywidth=12)
687 ([['aaa', 'f', 'k'],
696 Out[1]:
688 ['b', 'g', 'l'],
697 ([['aaa', 'f', 'k'],
689 ['cc', 'h', None],
698 ['b', 'g', 'l'],
690 ['d', 'i', None],
699 ['cc', 'h', None],
691 ['eeeee', 'j', None]],
700 ['d', 'i', None],
692 {'columns_numbers': 3,
701 ['eeeee', 'j', None]],
693 'columns_width': [5, 1, 1],
702 {'columns_numbers': 3,
694 'optimal_separator_width': 2,
703 'columns_width': [5, 1, 1],
695 'rows_numbers': 5})
704 'optimal_separator_width': 2,
696
705 'rows_numbers': 5})
697 """
706 """
698 info = _find_optimal(list(map(len, items)), *args, **kwargs)
707 info = _find_optimal(list(map(len, items)), *args, **kwargs)
699 nrow, ncol = info['rows_numbers'], info['columns_numbers']
708 nrow, ncol = info['rows_numbers'], info['columns_numbers']
700 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
709 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
701
710
702
711
703 def columnize(items, separator=' ', displaywidth=80):
712 def columnize(items, separator=' ', displaywidth=80):
704 """ Transform a list of strings into a single string with columns.
713 """ Transform a list of strings into a single string with columns.
705
714
706 Parameters
715 Parameters
707 ----------
716 ----------
708 items : sequence of strings
717 items : sequence of strings
709 The strings to process.
718 The strings to process.
710
719
711 separator : str, optional [default is two spaces]
720 separator : str, optional [default is two spaces]
712 The string that separates columns.
721 The string that separates columns.
713
722
714 displaywidth : int, optional [default is 80]
723 displaywidth : int, optional [default is 80]
715 Width of the display in number of characters.
724 Width of the display in number of characters.
716
725
717 Returns
726 Returns
718 -------
727 -------
719 The formatted string.
728 The formatted string.
720 """
729 """
721 if not items :
730 if not items :
722 return '\n'
731 return '\n'
723 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
732 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
724 fmatrix = [filter(None, x) for x in matrix]
733 fmatrix = [filter(None, x) for x in matrix]
725 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
734 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
726 return '\n'.join(map(sjoin, fmatrix))+'\n'
735 return '\n'.join(map(sjoin, fmatrix))+'\n'
727
736
728
737
729 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
738 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
730 """
739 """
731 Return a string with a natural enumeration of items
740 Return a string with a natural enumeration of items
732
741
733 >>> get_text_list(['a', 'b', 'c', 'd'])
742 >>> get_text_list(['a', 'b', 'c', 'd'])
734 'a, b, c and d'
743 'a, b, c and d'
735 >>> get_text_list(['a', 'b', 'c'], ' or ')
744 >>> get_text_list(['a', 'b', 'c'], ' or ')
736 'a, b or c'
745 'a, b or c'
737 >>> get_text_list(['a', 'b', 'c'], ', ')
746 >>> get_text_list(['a', 'b', 'c'], ', ')
738 'a, b, c'
747 'a, b, c'
739 >>> get_text_list(['a', 'b'], ' or ')
748 >>> get_text_list(['a', 'b'], ' or ')
740 'a or b'
749 'a or b'
741 >>> get_text_list(['a'])
750 >>> get_text_list(['a'])
742 'a'
751 'a'
743 >>> get_text_list([])
752 >>> get_text_list([])
744 ''
753 ''
745 >>> get_text_list(['a', 'b'], wrap_item_with="`")
754 >>> get_text_list(['a', 'b'], wrap_item_with="`")
746 '`a` and `b`'
755 '`a` and `b`'
747 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
756 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
748 'a + b + c = d'
757 'a + b + c = d'
749 """
758 """
750 if len(list_) == 0:
759 if len(list_) == 0:
751 return ''
760 return ''
752 if wrap_item_with:
761 if wrap_item_with:
753 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
762 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
754 item in list_]
763 item in list_]
755 if len(list_) == 1:
764 if len(list_) == 1:
756 return list_[0]
765 return list_[0]
757 return '%s%s%s' % (
766 return '%s%s%s' % (
758 sep.join(i for i in list_[:-1]),
767 sep.join(i for i in list_[:-1]),
759 last_sep, list_[-1]) No newline at end of file
768 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now