##// END OF EJS Templates
generalizing get_text_list helper
Martín Gaitán -
Show More
@@ -1,755 +1,759 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2008-2011 The IPython Development Team
12 # Copyright (C) 2008-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21
21
22 import os
22 import os
23 import re
23 import re
24 import sys
24 import sys
25 import textwrap
25 import textwrap
26 from string import Formatter
26 from string import Formatter
27
27
28 from IPython.external.path import path
28 from IPython.external.path import path
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
29 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
30 from IPython.utils import py3compat
30 from IPython.utils import py3compat
31
31
32
32
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34 # Declarations
34 # Declarations
35 #-----------------------------------------------------------------------------
35 #-----------------------------------------------------------------------------
36
36
37 # datetime.strftime date format for ipython
37 # datetime.strftime date format for ipython
38 if sys.platform == 'win32':
38 if sys.platform == 'win32':
39 date_format = "%B %d, %Y"
39 date_format = "%B %d, %Y"
40 else:
40 else:
41 date_format = "%B %-d, %Y"
41 date_format = "%B %-d, %Y"
42
42
43
43
44 #-----------------------------------------------------------------------------
44 #-----------------------------------------------------------------------------
45 # Code
45 # Code
46 #-----------------------------------------------------------------------------
46 #-----------------------------------------------------------------------------
47
47
48 class LSString(str):
48 class LSString(str):
49 """String derivative with a special access attributes.
49 """String derivative with a special access attributes.
50
50
51 These are normal strings, but with the special attributes:
51 These are normal strings, but with the special attributes:
52
52
53 .l (or .list) : value as list (split on newlines).
53 .l (or .list) : value as list (split on newlines).
54 .n (or .nlstr): original value (the string itself).
54 .n (or .nlstr): original value (the string itself).
55 .s (or .spstr): value as whitespace-separated string.
55 .s (or .spstr): value as whitespace-separated string.
56 .p (or .paths): list of path objects
56 .p (or .paths): list of path objects
57
57
58 Any values which require transformations are computed only once and
58 Any values which require transformations are computed only once and
59 cached.
59 cached.
60
60
61 Such strings are very useful to efficiently interact with the shell, which
61 Such strings are very useful to efficiently interact with the shell, which
62 typically only understands whitespace-separated options for commands."""
62 typically only understands whitespace-separated options for commands."""
63
63
64 def get_list(self):
64 def get_list(self):
65 try:
65 try:
66 return self.__list
66 return self.__list
67 except AttributeError:
67 except AttributeError:
68 self.__list = self.split('\n')
68 self.__list = self.split('\n')
69 return self.__list
69 return self.__list
70
70
71 l = list = property(get_list)
71 l = list = property(get_list)
72
72
73 def get_spstr(self):
73 def get_spstr(self):
74 try:
74 try:
75 return self.__spstr
75 return self.__spstr
76 except AttributeError:
76 except AttributeError:
77 self.__spstr = self.replace('\n',' ')
77 self.__spstr = self.replace('\n',' ')
78 return self.__spstr
78 return self.__spstr
79
79
80 s = spstr = property(get_spstr)
80 s = spstr = property(get_spstr)
81
81
82 def get_nlstr(self):
82 def get_nlstr(self):
83 return self
83 return self
84
84
85 n = nlstr = property(get_nlstr)
85 n = nlstr = property(get_nlstr)
86
86
87 def get_paths(self):
87 def get_paths(self):
88 try:
88 try:
89 return self.__paths
89 return self.__paths
90 except AttributeError:
90 except AttributeError:
91 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
91 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
92 return self.__paths
92 return self.__paths
93
93
94 p = paths = property(get_paths)
94 p = paths = property(get_paths)
95
95
96 # FIXME: We need to reimplement type specific displayhook and then add this
96 # FIXME: We need to reimplement type specific displayhook and then add this
97 # back as a custom printer. This should also be moved outside utils into the
97 # back as a custom printer. This should also be moved outside utils into the
98 # core.
98 # core.
99
99
100 # def print_lsstring(arg):
100 # def print_lsstring(arg):
101 # """ Prettier (non-repr-like) and more informative printer for LSString """
101 # """ Prettier (non-repr-like) and more informative printer for LSString """
102 # print "LSString (.p, .n, .l, .s available). Value:"
102 # print "LSString (.p, .n, .l, .s available). Value:"
103 # print arg
103 # print arg
104 #
104 #
105 #
105 #
106 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
106 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
107
107
108
108
109 class SList(list):
109 class SList(list):
110 """List derivative with a special access attributes.
110 """List derivative with a special access attributes.
111
111
112 These are normal lists, but with the special attributes:
112 These are normal lists, but with the special attributes:
113
113
114 .l (or .list) : value as list (the list itself).
114 .l (or .list) : value as list (the list itself).
115 .n (or .nlstr): value as a string, joined on newlines.
115 .n (or .nlstr): value as a string, joined on newlines.
116 .s (or .spstr): value as a string, joined on spaces.
116 .s (or .spstr): value as a string, joined on spaces.
117 .p (or .paths): list of path objects
117 .p (or .paths): list of path objects
118
118
119 Any values which require transformations are computed only once and
119 Any values which require transformations are computed only once and
120 cached."""
120 cached."""
121
121
122 def get_list(self):
122 def get_list(self):
123 return self
123 return self
124
124
125 l = list = property(get_list)
125 l = list = property(get_list)
126
126
127 def get_spstr(self):
127 def get_spstr(self):
128 try:
128 try:
129 return self.__spstr
129 return self.__spstr
130 except AttributeError:
130 except AttributeError:
131 self.__spstr = ' '.join(self)
131 self.__spstr = ' '.join(self)
132 return self.__spstr
132 return self.__spstr
133
133
134 s = spstr = property(get_spstr)
134 s = spstr = property(get_spstr)
135
135
136 def get_nlstr(self):
136 def get_nlstr(self):
137 try:
137 try:
138 return self.__nlstr
138 return self.__nlstr
139 except AttributeError:
139 except AttributeError:
140 self.__nlstr = '\n'.join(self)
140 self.__nlstr = '\n'.join(self)
141 return self.__nlstr
141 return self.__nlstr
142
142
143 n = nlstr = property(get_nlstr)
143 n = nlstr = property(get_nlstr)
144
144
145 def get_paths(self):
145 def get_paths(self):
146 try:
146 try:
147 return self.__paths
147 return self.__paths
148 except AttributeError:
148 except AttributeError:
149 self.__paths = [path(p) for p in self if os.path.exists(p)]
149 self.__paths = [path(p) for p in self if os.path.exists(p)]
150 return self.__paths
150 return self.__paths
151
151
152 p = paths = property(get_paths)
152 p = paths = property(get_paths)
153
153
154 def grep(self, pattern, prune = False, field = None):
154 def grep(self, pattern, prune = False, field = None):
155 """ Return all strings matching 'pattern' (a regex or callable)
155 """ Return all strings matching 'pattern' (a regex or callable)
156
156
157 This is case-insensitive. If prune is true, return all items
157 This is case-insensitive. If prune is true, return all items
158 NOT matching the pattern.
158 NOT matching the pattern.
159
159
160 If field is specified, the match must occur in the specified
160 If field is specified, the match must occur in the specified
161 whitespace-separated field.
161 whitespace-separated field.
162
162
163 Examples::
163 Examples::
164
164
165 a.grep( lambda x: x.startswith('C') )
165 a.grep( lambda x: x.startswith('C') )
166 a.grep('Cha.*log', prune=1)
166 a.grep('Cha.*log', prune=1)
167 a.grep('chm', field=-1)
167 a.grep('chm', field=-1)
168 """
168 """
169
169
170 def match_target(s):
170 def match_target(s):
171 if field is None:
171 if field is None:
172 return s
172 return s
173 parts = s.split()
173 parts = s.split()
174 try:
174 try:
175 tgt = parts[field]
175 tgt = parts[field]
176 return tgt
176 return tgt
177 except IndexError:
177 except IndexError:
178 return ""
178 return ""
179
179
180 if isinstance(pattern, basestring):
180 if isinstance(pattern, basestring):
181 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
181 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
182 else:
182 else:
183 pred = pattern
183 pred = pattern
184 if not prune:
184 if not prune:
185 return SList([el for el in self if pred(match_target(el))])
185 return SList([el for el in self if pred(match_target(el))])
186 else:
186 else:
187 return SList([el for el in self if not pred(match_target(el))])
187 return SList([el for el in self if not pred(match_target(el))])
188
188
189 def fields(self, *fields):
189 def fields(self, *fields):
190 """ Collect whitespace-separated fields from string list
190 """ Collect whitespace-separated fields from string list
191
191
192 Allows quick awk-like usage of string lists.
192 Allows quick awk-like usage of string lists.
193
193
194 Example data (in var a, created by 'a = !ls -l')::
194 Example data (in var a, created by 'a = !ls -l')::
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
197
197
198 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
198 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
199 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
199 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
200 (note the joining by space).
200 (note the joining by space).
201 a.fields(-1) is ['ChangeLog', 'IPython']
201 a.fields(-1) is ['ChangeLog', 'IPython']
202
202
203 IndexErrors are ignored.
203 IndexErrors are ignored.
204
204
205 Without args, fields() just split()'s the strings.
205 Without args, fields() just split()'s the strings.
206 """
206 """
207 if len(fields) == 0:
207 if len(fields) == 0:
208 return [el.split() for el in self]
208 return [el.split() for el in self]
209
209
210 res = SList()
210 res = SList()
211 for el in [f.split() for f in self]:
211 for el in [f.split() for f in self]:
212 lineparts = []
212 lineparts = []
213
213
214 for fd in fields:
214 for fd in fields:
215 try:
215 try:
216 lineparts.append(el[fd])
216 lineparts.append(el[fd])
217 except IndexError:
217 except IndexError:
218 pass
218 pass
219 if lineparts:
219 if lineparts:
220 res.append(" ".join(lineparts))
220 res.append(" ".join(lineparts))
221
221
222 return res
222 return res
223
223
224 def sort(self,field= None, nums = False):
224 def sort(self,field= None, nums = False):
225 """ sort by specified fields (see fields())
225 """ sort by specified fields (see fields())
226
226
227 Example::
227 Example::
228 a.sort(1, nums = True)
228 a.sort(1, nums = True)
229
229
230 Sorts a by second field, in numerical order (so that 21 > 3)
230 Sorts a by second field, in numerical order (so that 21 > 3)
231
231
232 """
232 """
233
233
234 #decorate, sort, undecorate
234 #decorate, sort, undecorate
235 if field is not None:
235 if field is not None:
236 dsu = [[SList([line]).fields(field), line] for line in self]
236 dsu = [[SList([line]).fields(field), line] for line in self]
237 else:
237 else:
238 dsu = [[line, line] for line in self]
238 dsu = [[line, line] for line in self]
239 if nums:
239 if nums:
240 for i in range(len(dsu)):
240 for i in range(len(dsu)):
241 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
241 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
242 try:
242 try:
243 n = int(numstr)
243 n = int(numstr)
244 except ValueError:
244 except ValueError:
245 n = 0;
245 n = 0;
246 dsu[i][0] = n
246 dsu[i][0] = n
247
247
248
248
249 dsu.sort()
249 dsu.sort()
250 return SList([t[1] for t in dsu])
250 return SList([t[1] for t in dsu])
251
251
252
252
253 # FIXME: We need to reimplement type specific displayhook and then add this
253 # FIXME: We need to reimplement type specific displayhook and then add this
254 # back as a custom printer. This should also be moved outside utils into the
254 # back as a custom printer. This should also be moved outside utils into the
255 # core.
255 # core.
256
256
257 # def print_slist(arg):
257 # def print_slist(arg):
258 # """ Prettier (non-repr-like) and more informative printer for SList """
258 # """ Prettier (non-repr-like) and more informative printer for SList """
259 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
259 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
260 # if hasattr(arg, 'hideonce') and arg.hideonce:
260 # if hasattr(arg, 'hideonce') and arg.hideonce:
261 # arg.hideonce = False
261 # arg.hideonce = False
262 # return
262 # return
263 #
263 #
264 # nlprint(arg) # This was a nested list printer, now removed.
264 # nlprint(arg) # This was a nested list printer, now removed.
265 #
265 #
266 # print_slist = result_display.when_type(SList)(print_slist)
266 # print_slist = result_display.when_type(SList)(print_slist)
267
267
268
268
269 def indent(instr,nspaces=4, ntabs=0, flatten=False):
269 def indent(instr,nspaces=4, ntabs=0, flatten=False):
270 """Indent a string a given number of spaces or tabstops.
270 """Indent a string a given number of spaces or tabstops.
271
271
272 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
272 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
273
273
274 Parameters
274 Parameters
275 ----------
275 ----------
276
276
277 instr : basestring
277 instr : basestring
278 The string to be indented.
278 The string to be indented.
279 nspaces : int (default: 4)
279 nspaces : int (default: 4)
280 The number of spaces to be indented.
280 The number of spaces to be indented.
281 ntabs : int (default: 0)
281 ntabs : int (default: 0)
282 The number of tabs to be indented.
282 The number of tabs to be indented.
283 flatten : bool (default: False)
283 flatten : bool (default: False)
284 Whether to scrub existing indentation. If True, all lines will be
284 Whether to scrub existing indentation. If True, all lines will be
285 aligned to the same indentation. If False, existing indentation will
285 aligned to the same indentation. If False, existing indentation will
286 be strictly increased.
286 be strictly increased.
287
287
288 Returns
288 Returns
289 -------
289 -------
290
290
291 str|unicode : string indented by ntabs and nspaces.
291 str|unicode : string indented by ntabs and nspaces.
292
292
293 """
293 """
294 if instr is None:
294 if instr is None:
295 return
295 return
296 ind = '\t'*ntabs+' '*nspaces
296 ind = '\t'*ntabs+' '*nspaces
297 if flatten:
297 if flatten:
298 pat = re.compile(r'^\s*', re.MULTILINE)
298 pat = re.compile(r'^\s*', re.MULTILINE)
299 else:
299 else:
300 pat = re.compile(r'^', re.MULTILINE)
300 pat = re.compile(r'^', re.MULTILINE)
301 outstr = re.sub(pat, ind, instr)
301 outstr = re.sub(pat, ind, instr)
302 if outstr.endswith(os.linesep+ind):
302 if outstr.endswith(os.linesep+ind):
303 return outstr[:-len(ind)]
303 return outstr[:-len(ind)]
304 else:
304 else:
305 return outstr
305 return outstr
306
306
307
307
308 def list_strings(arg):
308 def list_strings(arg):
309 """Always return a list of strings, given a string or list of strings
309 """Always return a list of strings, given a string or list of strings
310 as input.
310 as input.
311
311
312 :Examples:
312 :Examples:
313
313
314 In [7]: list_strings('A single string')
314 In [7]: list_strings('A single string')
315 Out[7]: ['A single string']
315 Out[7]: ['A single string']
316
316
317 In [8]: list_strings(['A single string in a list'])
317 In [8]: list_strings(['A single string in a list'])
318 Out[8]: ['A single string in a list']
318 Out[8]: ['A single string in a list']
319
319
320 In [9]: list_strings(['A','list','of','strings'])
320 In [9]: list_strings(['A','list','of','strings'])
321 Out[9]: ['A', 'list', 'of', 'strings']
321 Out[9]: ['A', 'list', 'of', 'strings']
322 """
322 """
323
323
324 if isinstance(arg,basestring): return [arg]
324 if isinstance(arg,basestring): return [arg]
325 else: return arg
325 else: return arg
326
326
327
327
328 def marquee(txt='',width=78,mark='*'):
328 def marquee(txt='',width=78,mark='*'):
329 """Return the input string centered in a 'marquee'.
329 """Return the input string centered in a 'marquee'.
330
330
331 :Examples:
331 :Examples:
332
332
333 In [16]: marquee('A test',40)
333 In [16]: marquee('A test',40)
334 Out[16]: '**************** A test ****************'
334 Out[16]: '**************** A test ****************'
335
335
336 In [17]: marquee('A test',40,'-')
336 In [17]: marquee('A test',40,'-')
337 Out[17]: '---------------- A test ----------------'
337 Out[17]: '---------------- A test ----------------'
338
338
339 In [18]: marquee('A test',40,' ')
339 In [18]: marquee('A test',40,' ')
340 Out[18]: ' A test '
340 Out[18]: ' A test '
341
341
342 """
342 """
343 if not txt:
343 if not txt:
344 return (mark*width)[:width]
344 return (mark*width)[:width]
345 nmark = (width-len(txt)-2)//len(mark)//2
345 nmark = (width-len(txt)-2)//len(mark)//2
346 if nmark < 0: nmark =0
346 if nmark < 0: nmark =0
347 marks = mark*nmark
347 marks = mark*nmark
348 return '%s %s %s' % (marks,txt,marks)
348 return '%s %s %s' % (marks,txt,marks)
349
349
350
350
351 ini_spaces_re = re.compile(r'^(\s+)')
351 ini_spaces_re = re.compile(r'^(\s+)')
352
352
353 def num_ini_spaces(strng):
353 def num_ini_spaces(strng):
354 """Return the number of initial spaces in a string"""
354 """Return the number of initial spaces in a string"""
355
355
356 ini_spaces = ini_spaces_re.match(strng)
356 ini_spaces = ini_spaces_re.match(strng)
357 if ini_spaces:
357 if ini_spaces:
358 return ini_spaces.end()
358 return ini_spaces.end()
359 else:
359 else:
360 return 0
360 return 0
361
361
362
362
363 def format_screen(strng):
363 def format_screen(strng):
364 """Format a string for screen printing.
364 """Format a string for screen printing.
365
365
366 This removes some latex-type format codes."""
366 This removes some latex-type format codes."""
367 # Paragraph continue
367 # Paragraph continue
368 par_re = re.compile(r'\\$',re.MULTILINE)
368 par_re = re.compile(r'\\$',re.MULTILINE)
369 strng = par_re.sub('',strng)
369 strng = par_re.sub('',strng)
370 return strng
370 return strng
371
371
372
372
373 def dedent(text):
373 def dedent(text):
374 """Equivalent of textwrap.dedent that ignores unindented first line.
374 """Equivalent of textwrap.dedent that ignores unindented first line.
375
375
376 This means it will still dedent strings like:
376 This means it will still dedent strings like:
377 '''foo
377 '''foo
378 is a bar
378 is a bar
379 '''
379 '''
380
380
381 For use in wrap_paragraphs.
381 For use in wrap_paragraphs.
382 """
382 """
383
383
384 if text.startswith('\n'):
384 if text.startswith('\n'):
385 # text starts with blank line, don't ignore the first line
385 # text starts with blank line, don't ignore the first line
386 return textwrap.dedent(text)
386 return textwrap.dedent(text)
387
387
388 # split first line
388 # split first line
389 splits = text.split('\n',1)
389 splits = text.split('\n',1)
390 if len(splits) == 1:
390 if len(splits) == 1:
391 # only one line
391 # only one line
392 return textwrap.dedent(text)
392 return textwrap.dedent(text)
393
393
394 first, rest = splits
394 first, rest = splits
395 # dedent everything but the first line
395 # dedent everything but the first line
396 rest = textwrap.dedent(rest)
396 rest = textwrap.dedent(rest)
397 return '\n'.join([first, rest])
397 return '\n'.join([first, rest])
398
398
399
399
400 def wrap_paragraphs(text, ncols=80):
400 def wrap_paragraphs(text, ncols=80):
401 """Wrap multiple paragraphs to fit a specified width.
401 """Wrap multiple paragraphs to fit a specified width.
402
402
403 This is equivalent to textwrap.wrap, but with support for multiple
403 This is equivalent to textwrap.wrap, but with support for multiple
404 paragraphs, as separated by empty lines.
404 paragraphs, as separated by empty lines.
405
405
406 Returns
406 Returns
407 -------
407 -------
408
408
409 list of complete paragraphs, wrapped to fill `ncols` columns.
409 list of complete paragraphs, wrapped to fill `ncols` columns.
410 """
410 """
411 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
411 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
412 text = dedent(text).strip()
412 text = dedent(text).strip()
413 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
413 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
414 out_ps = []
414 out_ps = []
415 indent_re = re.compile(r'\n\s+', re.MULTILINE)
415 indent_re = re.compile(r'\n\s+', re.MULTILINE)
416 for p in paragraphs:
416 for p in paragraphs:
417 # presume indentation that survives dedent is meaningful formatting,
417 # presume indentation that survives dedent is meaningful formatting,
418 # so don't fill unless text is flush.
418 # so don't fill unless text is flush.
419 if indent_re.search(p) is None:
419 if indent_re.search(p) is None:
420 # wrap paragraph
420 # wrap paragraph
421 p = textwrap.fill(p, ncols)
421 p = textwrap.fill(p, ncols)
422 out_ps.append(p)
422 out_ps.append(p)
423 return out_ps
423 return out_ps
424
424
425
425
426 def long_substr(data):
426 def long_substr(data):
427 """Return the longest common substring in a list of strings.
427 """Return the longest common substring in a list of strings.
428
428
429 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
429 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
430 """
430 """
431 substr = ''
431 substr = ''
432 if len(data) > 1 and len(data[0]) > 0:
432 if len(data) > 1 and len(data[0]) > 0:
433 for i in range(len(data[0])):
433 for i in range(len(data[0])):
434 for j in range(len(data[0])-i+1):
434 for j in range(len(data[0])-i+1):
435 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
435 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
436 substr = data[0][i:i+j]
436 substr = data[0][i:i+j]
437 elif len(data) == 1:
437 elif len(data) == 1:
438 substr = data[0]
438 substr = data[0]
439 return substr
439 return substr
440
440
441
441
442 def strip_email_quotes(text):
442 def strip_email_quotes(text):
443 """Strip leading email quotation characters ('>').
443 """Strip leading email quotation characters ('>').
444
444
445 Removes any combination of leading '>' interspersed with whitespace that
445 Removes any combination of leading '>' interspersed with whitespace that
446 appears *identically* in all lines of the input text.
446 appears *identically* in all lines of the input text.
447
447
448 Parameters
448 Parameters
449 ----------
449 ----------
450 text : str
450 text : str
451
451
452 Examples
452 Examples
453 --------
453 --------
454
454
455 Simple uses::
455 Simple uses::
456
456
457 In [2]: strip_email_quotes('> > text')
457 In [2]: strip_email_quotes('> > text')
458 Out[2]: 'text'
458 Out[2]: 'text'
459
459
460 In [3]: strip_email_quotes('> > text\\n> > more')
460 In [3]: strip_email_quotes('> > text\\n> > more')
461 Out[3]: 'text\\nmore'
461 Out[3]: 'text\\nmore'
462
462
463 Note how only the common prefix that appears in all lines is stripped::
463 Note how only the common prefix that appears in all lines is stripped::
464
464
465 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
465 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
466 Out[4]: '> text\\n> more\\nmore...'
466 Out[4]: '> text\\n> more\\nmore...'
467
467
468 So if any line has no quote marks ('>') , then none are stripped from any
468 So if any line has no quote marks ('>') , then none are stripped from any
469 of them ::
469 of them ::
470
470
471 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
471 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
472 Out[5]: '> > text\\n> > more\\nlast different'
472 Out[5]: '> > text\\n> > more\\nlast different'
473 """
473 """
474 lines = text.splitlines()
474 lines = text.splitlines()
475 matches = set()
475 matches = set()
476 for line in lines:
476 for line in lines:
477 prefix = re.match(r'^(\s*>[ >]*)', line)
477 prefix = re.match(r'^(\s*>[ >]*)', line)
478 if prefix:
478 if prefix:
479 matches.add(prefix.group(1))
479 matches.add(prefix.group(1))
480 else:
480 else:
481 break
481 break
482 else:
482 else:
483 prefix = long_substr(list(matches))
483 prefix = long_substr(list(matches))
484 if prefix:
484 if prefix:
485 strip = len(prefix)
485 strip = len(prefix)
486 text = '\n'.join([ ln[strip:] for ln in lines])
486 text = '\n'.join([ ln[strip:] for ln in lines])
487 return text
487 return text
488
488
489
489
490 class EvalFormatter(Formatter):
490 class EvalFormatter(Formatter):
491 """A String Formatter that allows evaluation of simple expressions.
491 """A String Formatter that allows evaluation of simple expressions.
492
492
493 Note that this version interprets a : as specifying a format string (as per
493 Note that this version interprets a : as specifying a format string (as per
494 standard string formatting), so if slicing is required, you must explicitly
494 standard string formatting), so if slicing is required, you must explicitly
495 create a slice.
495 create a slice.
496
496
497 This is to be used in templating cases, such as the parallel batch
497 This is to be used in templating cases, such as the parallel batch
498 script templates, where simple arithmetic on arguments is useful.
498 script templates, where simple arithmetic on arguments is useful.
499
499
500 Examples
500 Examples
501 --------
501 --------
502
502
503 In [1]: f = EvalFormatter()
503 In [1]: f = EvalFormatter()
504 In [2]: f.format('{n//4}', n=8)
504 In [2]: f.format('{n//4}', n=8)
505 Out [2]: '2'
505 Out [2]: '2'
506
506
507 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
507 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
508 Out [3]: 'll'
508 Out [3]: 'll'
509 """
509 """
510 def get_field(self, name, args, kwargs):
510 def get_field(self, name, args, kwargs):
511 v = eval(name, kwargs)
511 v = eval(name, kwargs)
512 return v, name
512 return v, name
513
513
514
514
515 @skip_doctest_py3
515 @skip_doctest_py3
516 class FullEvalFormatter(Formatter):
516 class FullEvalFormatter(Formatter):
517 """A String Formatter that allows evaluation of simple expressions.
517 """A String Formatter that allows evaluation of simple expressions.
518
518
519 Any time a format key is not found in the kwargs,
519 Any time a format key is not found in the kwargs,
520 it will be tried as an expression in the kwargs namespace.
520 it will be tried as an expression in the kwargs namespace.
521
521
522 Note that this version allows slicing using [1:2], so you cannot specify
522 Note that this version allows slicing using [1:2], so you cannot specify
523 a format string. Use :class:`EvalFormatter` to permit format strings.
523 a format string. Use :class:`EvalFormatter` to permit format strings.
524
524
525 Examples
525 Examples
526 --------
526 --------
527
527
528 In [1]: f = FullEvalFormatter()
528 In [1]: f = FullEvalFormatter()
529 In [2]: f.format('{n//4}', n=8)
529 In [2]: f.format('{n//4}', n=8)
530 Out[2]: u'2'
530 Out[2]: u'2'
531
531
532 In [3]: f.format('{list(range(5))[2:4]}')
532 In [3]: f.format('{list(range(5))[2:4]}')
533 Out[3]: u'[2, 3]'
533 Out[3]: u'[2, 3]'
534
534
535 In [4]: f.format('{3*2}')
535 In [4]: f.format('{3*2}')
536 Out[4]: u'6'
536 Out[4]: u'6'
537 """
537 """
538 # copied from Formatter._vformat with minor changes to allow eval
538 # copied from Formatter._vformat with minor changes to allow eval
539 # and replace the format_spec code with slicing
539 # and replace the format_spec code with slicing
540 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
540 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
541 if recursion_depth < 0:
541 if recursion_depth < 0:
542 raise ValueError('Max string recursion exceeded')
542 raise ValueError('Max string recursion exceeded')
543 result = []
543 result = []
544 for literal_text, field_name, format_spec, conversion in \
544 for literal_text, field_name, format_spec, conversion in \
545 self.parse(format_string):
545 self.parse(format_string):
546
546
547 # output the literal text
547 # output the literal text
548 if literal_text:
548 if literal_text:
549 result.append(literal_text)
549 result.append(literal_text)
550
550
551 # if there's a field, output it
551 # if there's a field, output it
552 if field_name is not None:
552 if field_name is not None:
553 # this is some markup, find the object and do
553 # this is some markup, find the object and do
554 # the formatting
554 # the formatting
555
555
556 if format_spec:
556 if format_spec:
557 # override format spec, to allow slicing:
557 # override format spec, to allow slicing:
558 field_name = ':'.join([field_name, format_spec])
558 field_name = ':'.join([field_name, format_spec])
559
559
560 # eval the contents of the field for the object
560 # eval the contents of the field for the object
561 # to be formatted
561 # to be formatted
562 obj = eval(field_name, kwargs)
562 obj = eval(field_name, kwargs)
563
563
564 # do any conversion on the resulting object
564 # do any conversion on the resulting object
565 obj = self.convert_field(obj, conversion)
565 obj = self.convert_field(obj, conversion)
566
566
567 # format the object and append to the result
567 # format the object and append to the result
568 result.append(self.format_field(obj, ''))
568 result.append(self.format_field(obj, ''))
569
569
570 return u''.join(py3compat.cast_unicode(s) for s in result)
570 return u''.join(py3compat.cast_unicode(s) for s in result)
571
571
572
572
573 @skip_doctest_py3
573 @skip_doctest_py3
574 class DollarFormatter(FullEvalFormatter):
574 class DollarFormatter(FullEvalFormatter):
575 """Formatter allowing Itpl style $foo replacement, for names and attribute
575 """Formatter allowing Itpl style $foo replacement, for names and attribute
576 access only. Standard {foo} replacement also works, and allows full
576 access only. Standard {foo} replacement also works, and allows full
577 evaluation of its arguments.
577 evaluation of its arguments.
578
578
579 Examples
579 Examples
580 --------
580 --------
581 In [1]: f = DollarFormatter()
581 In [1]: f = DollarFormatter()
582 In [2]: f.format('{n//4}', n=8)
582 In [2]: f.format('{n//4}', n=8)
583 Out[2]: u'2'
583 Out[2]: u'2'
584
584
585 In [3]: f.format('23 * 76 is $result', result=23*76)
585 In [3]: f.format('23 * 76 is $result', result=23*76)
586 Out[3]: u'23 * 76 is 1748'
586 Out[3]: u'23 * 76 is 1748'
587
587
588 In [4]: f.format('$a or {b}', a=1, b=2)
588 In [4]: f.format('$a or {b}', a=1, b=2)
589 Out[4]: u'1 or 2'
589 Out[4]: u'1 or 2'
590 """
590 """
591 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
591 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
592 def parse(self, fmt_string):
592 def parse(self, fmt_string):
593 for literal_txt, field_name, format_spec, conversion \
593 for literal_txt, field_name, format_spec, conversion \
594 in Formatter.parse(self, fmt_string):
594 in Formatter.parse(self, fmt_string):
595
595
596 # Find $foo patterns in the literal text.
596 # Find $foo patterns in the literal text.
597 continue_from = 0
597 continue_from = 0
598 txt = ""
598 txt = ""
599 for m in self._dollar_pattern.finditer(literal_txt):
599 for m in self._dollar_pattern.finditer(literal_txt):
600 new_txt, new_field = m.group(1,2)
600 new_txt, new_field = m.group(1,2)
601 # $$foo --> $foo
601 # $$foo --> $foo
602 if new_field.startswith("$"):
602 if new_field.startswith("$"):
603 txt += new_txt + new_field
603 txt += new_txt + new_field
604 else:
604 else:
605 yield (txt + new_txt, new_field, "", None)
605 yield (txt + new_txt, new_field, "", None)
606 txt = ""
606 txt = ""
607 continue_from = m.end()
607 continue_from = m.end()
608
608
609 # Re-yield the {foo} style pattern
609 # Re-yield the {foo} style pattern
610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
611
611
612 #-----------------------------------------------------------------------------
612 #-----------------------------------------------------------------------------
613 # Utils to columnize a list of string
613 # Utils to columnize a list of string
614 #-----------------------------------------------------------------------------
614 #-----------------------------------------------------------------------------
615
615
616 def _chunks(l, n):
616 def _chunks(l, n):
617 """Yield successive n-sized chunks from l."""
617 """Yield successive n-sized chunks from l."""
618 for i in xrange(0, len(l), n):
618 for i in xrange(0, len(l), n):
619 yield l[i:i+n]
619 yield l[i:i+n]
620
620
621
621
622 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
622 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
623 """Calculate optimal info to columnize a list of string"""
623 """Calculate optimal info to columnize a list of string"""
624 for nrow in range(1, len(rlist)+1) :
624 for nrow in range(1, len(rlist)+1) :
625 chk = map(max,_chunks(rlist, nrow))
625 chk = map(max,_chunks(rlist, nrow))
626 sumlength = sum(chk)
626 sumlength = sum(chk)
627 ncols = len(chk)
627 ncols = len(chk)
628 if sumlength+separator_size*(ncols-1) <= displaywidth :
628 if sumlength+separator_size*(ncols-1) <= displaywidth :
629 break;
629 break;
630 return {'columns_numbers' : ncols,
630 return {'columns_numbers' : ncols,
631 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
631 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
632 'rows_numbers' : nrow,
632 'rows_numbers' : nrow,
633 'columns_width' : chk
633 'columns_width' : chk
634 }
634 }
635
635
636
636
637 def _get_or_default(mylist, i, default=None):
637 def _get_or_default(mylist, i, default=None):
638 """return list item number, or default if don't exist"""
638 """return list item number, or default if don't exist"""
639 if i >= len(mylist):
639 if i >= len(mylist):
640 return default
640 return default
641 else :
641 else :
642 return mylist[i]
642 return mylist[i]
643
643
644
644
645 @skip_doctest
645 @skip_doctest
646 def compute_item_matrix(items, empty=None, *args, **kwargs) :
646 def compute_item_matrix(items, empty=None, *args, **kwargs) :
647 """Returns a nested list, and info to columnize items
647 """Returns a nested list, and info to columnize items
648
648
649 Parameters
649 Parameters
650 ----------
650 ----------
651
651
652 items :
652 items :
653 list of strings to columize
653 list of strings to columize
654 empty : (default None)
654 empty : (default None)
655 default value to fill list if needed
655 default value to fill list if needed
656 separator_size : int (default=2)
656 separator_size : int (default=2)
657 How much caracters will be used as a separation between each columns.
657 How much caracters will be used as a separation between each columns.
658 displaywidth : int (default=80)
658 displaywidth : int (default=80)
659 The width of the area onto wich the columns should enter
659 The width of the area onto wich the columns should enter
660
660
661 Returns
661 Returns
662 -------
662 -------
663
663
664 Returns a tuple of (strings_matrix, dict_info)
664 Returns a tuple of (strings_matrix, dict_info)
665
665
666 strings_matrix :
666 strings_matrix :
667
667
668 nested list of string, the outer most list contains as many list as
668 nested list of string, the outer most list contains as many list as
669 rows, the innermost lists have each as many element as colums. If the
669 rows, the innermost lists have each as many element as colums. If the
670 total number of elements in `items` does not equal the product of
670 total number of elements in `items` does not equal the product of
671 rows*columns, the last element of some lists are filled with `None`.
671 rows*columns, the last element of some lists are filled with `None`.
672
672
673 dict_info :
673 dict_info :
674 some info to make columnize easier:
674 some info to make columnize easier:
675
675
676 columns_numbers : number of columns
676 columns_numbers : number of columns
677 rows_numbers : number of rows
677 rows_numbers : number of rows
678 columns_width : list of with of each columns
678 columns_width : list of with of each columns
679 optimal_separator_width : best separator width between columns
679 optimal_separator_width : best separator width between columns
680
680
681 Examples
681 Examples
682 --------
682 --------
683
683
684 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
684 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
685 ...: compute_item_matrix(l,displaywidth=12)
685 ...: compute_item_matrix(l,displaywidth=12)
686 Out[1]:
686 Out[1]:
687 ([['aaa', 'f', 'k'],
687 ([['aaa', 'f', 'k'],
688 ['b', 'g', 'l'],
688 ['b', 'g', 'l'],
689 ['cc', 'h', None],
689 ['cc', 'h', None],
690 ['d', 'i', None],
690 ['d', 'i', None],
691 ['eeeee', 'j', None]],
691 ['eeeee', 'j', None]],
692 {'columns_numbers': 3,
692 {'columns_numbers': 3,
693 'columns_width': [5, 1, 1],
693 'columns_width': [5, 1, 1],
694 'optimal_separator_width': 2,
694 'optimal_separator_width': 2,
695 'rows_numbers': 5})
695 'rows_numbers': 5})
696
696
697 """
697 """
698 info = _find_optimal(map(len, items), *args, **kwargs)
698 info = _find_optimal(map(len, items), *args, **kwargs)
699 nrow, ncol = info['rows_numbers'], info['columns_numbers']
699 nrow, ncol = info['rows_numbers'], info['columns_numbers']
700 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
700 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
701
701
702
702
703 def columnize(items, separator=' ', displaywidth=80):
703 def columnize(items, separator=' ', displaywidth=80):
704 """ Transform a list of strings into a single string with columns.
704 """ Transform a list of strings into a single string with columns.
705
705
706 Parameters
706 Parameters
707 ----------
707 ----------
708 items : sequence of strings
708 items : sequence of strings
709 The strings to process.
709 The strings to process.
710
710
711 separator : str, optional [default is two spaces]
711 separator : str, optional [default is two spaces]
712 The string that separates columns.
712 The string that separates columns.
713
713
714 displaywidth : int, optional [default is 80]
714 displaywidth : int, optional [default is 80]
715 Width of the display in number of characters.
715 Width of the display in number of characters.
716
716
717 Returns
717 Returns
718 -------
718 -------
719 The formatted string.
719 The formatted string.
720 """
720 """
721 if not items :
721 if not items :
722 return '\n'
722 return '\n'
723 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
723 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
724 fmatrix = [filter(None, x) for x in matrix]
724 fmatrix = [filter(None, x) for x in matrix]
725 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
725 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
726 return '\n'.join(map(sjoin, fmatrix))+'\n'
726 return '\n'.join(map(sjoin, fmatrix))+'\n'
727
727
728
728
729 def get_text_list(list_, last_word='and', wrap_item_with=""):
729 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
730 """
730 """
731 Return a string with a natural enumeration of items
731 Return a string with a natural enumeration of items
732
732
733 >>> get_text_list(['a', 'b', 'c', 'd'])
733 >>> get_text_list(['a', 'b', 'c', 'd'])
734 'a, b, c and d'
734 'a, b, c and d'
735 >>> get_text_list(['a', 'b', 'c'], 'or')
735 >>> get_text_list(['a', 'b', 'c'], ' or ')
736 'a, b or c'
736 'a, b or c'
737 >>> get_text_list(['a', 'b'], 'or')
737 >>> get_text_list(['a', 'b', 'c'], ', ')
738 'a, b, c'
739 >>> get_text_list(['a', 'b'], ' or ')
738 'a or b'
740 'a or b'
739 >>> get_text_list(['a'])
741 >>> get_text_list(['a'])
740 'a'
742 'a'
741 >>> get_text_list([])
743 >>> get_text_list([])
742 ''
744 ''
743 >>> get_text_list(['a', 'b'], wrap_item_with="`")
745 >>> get_text_list(['a', 'b'], wrap_item_with="`")
744 '`a` and `b`'
746 '`a` and `b`'
747 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
748 'a + b + c = d'
745 """
749 """
746 if len(list_) == 0:
750 if len(list_) == 0:
747 return ''
751 return ''
748 if wrap_item_with:
752 if wrap_item_with:
749 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
753 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
750 item in list_]
754 item in list_]
751 if len(list_) == 1:
755 if len(list_) == 1:
752 return list_[0]
756 return list_[0]
753 return '%s %s %s' % (
757 return '%s%s%s' % (
754 ', '.join(i for i in list_[:-1]),
758 sep.join(i for i in list_[:-1]),
755 last_word, list_[-1]) No newline at end of file
759 last_sep, list_[-1]) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now