##// END OF EJS Templates
doc parssing issue
Matthias Bussonnier -
Show More
@@ -1,752 +1,752 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 import os
11 import os
12 import re
12 import re
13 import string
13 import string
14 import sys
14 import sys
15 import textwrap
15 import textwrap
16 from string import Formatter
16 from string import Formatter
17 from pathlib import Path
17 from pathlib import Path
18
18
19
19
20 # datetime.strftime date format for ipython
20 # datetime.strftime date format for ipython
21 if sys.platform == 'win32':
21 if sys.platform == 'win32':
22 date_format = "%B %d, %Y"
22 date_format = "%B %d, %Y"
23 else:
23 else:
24 date_format = "%B %-d, %Y"
24 date_format = "%B %-d, %Y"
25
25
26 class LSString(str):
26 class LSString(str):
27 """String derivative with a special access attributes.
27 """String derivative with a special access attributes.
28
28
29 These are normal strings, but with the special attributes:
29 These are normal strings, but with the special attributes:
30
30
31 .l (or .list) : value as list (split on newlines).
31 .l (or .list) : value as list (split on newlines).
32 .n (or .nlstr): original value (the string itself).
32 .n (or .nlstr): original value (the string itself).
33 .s (or .spstr): value as whitespace-separated string.
33 .s (or .spstr): value as whitespace-separated string.
34 .p (or .paths): list of path objects (requires path.py package)
34 .p (or .paths): list of path objects (requires path.py package)
35
35
36 Any values which require transformations are computed only once and
36 Any values which require transformations are computed only once and
37 cached.
37 cached.
38
38
39 Such strings are very useful to efficiently interact with the shell, which
39 Such strings are very useful to efficiently interact with the shell, which
40 typically only understands whitespace-separated options for commands."""
40 typically only understands whitespace-separated options for commands."""
41
41
42 def get_list(self):
42 def get_list(self):
43 try:
43 try:
44 return self.__list
44 return self.__list
45 except AttributeError:
45 except AttributeError:
46 self.__list = self.split('\n')
46 self.__list = self.split('\n')
47 return self.__list
47 return self.__list
48
48
49 l = list = property(get_list)
49 l = list = property(get_list)
50
50
51 def get_spstr(self):
51 def get_spstr(self):
52 try:
52 try:
53 return self.__spstr
53 return self.__spstr
54 except AttributeError:
54 except AttributeError:
55 self.__spstr = self.replace('\n',' ')
55 self.__spstr = self.replace('\n',' ')
56 return self.__spstr
56 return self.__spstr
57
57
58 s = spstr = property(get_spstr)
58 s = spstr = property(get_spstr)
59
59
60 def get_nlstr(self):
60 def get_nlstr(self):
61 return self
61 return self
62
62
63 n = nlstr = property(get_nlstr)
63 n = nlstr = property(get_nlstr)
64
64
65 def get_paths(self):
65 def get_paths(self):
66 try:
66 try:
67 return self.__paths
67 return self.__paths
68 except AttributeError:
68 except AttributeError:
69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
70 return self.__paths
70 return self.__paths
71
71
72 p = paths = property(get_paths)
72 p = paths = property(get_paths)
73
73
74 # FIXME: We need to reimplement type specific displayhook and then add this
74 # FIXME: We need to reimplement type specific displayhook and then add this
75 # back as a custom printer. This should also be moved outside utils into the
75 # back as a custom printer. This should also be moved outside utils into the
76 # core.
76 # core.
77
77
78 # def print_lsstring(arg):
78 # def print_lsstring(arg):
79 # """ Prettier (non-repr-like) and more informative printer for LSString """
79 # """ Prettier (non-repr-like) and more informative printer for LSString """
80 # print "LSString (.p, .n, .l, .s available). Value:"
80 # print "LSString (.p, .n, .l, .s available). Value:"
81 # print arg
81 # print arg
82 #
82 #
83 #
83 #
84 # print_lsstring = result_display.register(LSString)(print_lsstring)
84 # print_lsstring = result_display.register(LSString)(print_lsstring)
85
85
86
86
87 class SList(list):
87 class SList(list):
88 """List derivative with a special access attributes.
88 """List derivative with a special access attributes.
89
89
90 These are normal lists, but with the special attributes:
90 These are normal lists, but with the special attributes:
91
91
92 * .l (or .list) : value as list (the list itself).
92 * .l (or .list) : value as list (the list itself).
93 * .n (or .nlstr): value as a string, joined on newlines.
93 * .n (or .nlstr): value as a string, joined on newlines.
94 * .s (or .spstr): value as a string, joined on spaces.
94 * .s (or .spstr): value as a string, joined on spaces.
95 * .p (or .paths): list of path objects (requires path.py package)
95 * .p (or .paths): list of path objects (requires path.py package)
96
96
97 Any values which require transformations are computed only once and
97 Any values which require transformations are computed only once and
98 cached."""
98 cached."""
99
99
100 def get_list(self):
100 def get_list(self):
101 return self
101 return self
102
102
103 l = list = property(get_list)
103 l = list = property(get_list)
104
104
105 def get_spstr(self):
105 def get_spstr(self):
106 try:
106 try:
107 return self.__spstr
107 return self.__spstr
108 except AttributeError:
108 except AttributeError:
109 self.__spstr = ' '.join(self)
109 self.__spstr = ' '.join(self)
110 return self.__spstr
110 return self.__spstr
111
111
112 s = spstr = property(get_spstr)
112 s = spstr = property(get_spstr)
113
113
114 def get_nlstr(self):
114 def get_nlstr(self):
115 try:
115 try:
116 return self.__nlstr
116 return self.__nlstr
117 except AttributeError:
117 except AttributeError:
118 self.__nlstr = '\n'.join(self)
118 self.__nlstr = '\n'.join(self)
119 return self.__nlstr
119 return self.__nlstr
120
120
121 n = nlstr = property(get_nlstr)
121 n = nlstr = property(get_nlstr)
122
122
123 def get_paths(self):
123 def get_paths(self):
124 try:
124 try:
125 return self.__paths
125 return self.__paths
126 except AttributeError:
126 except AttributeError:
127 self.__paths = [Path(p) for p in self if os.path.exists(p)]
127 self.__paths = [Path(p) for p in self if os.path.exists(p)]
128 return self.__paths
128 return self.__paths
129
129
130 p = paths = property(get_paths)
130 p = paths = property(get_paths)
131
131
132 def grep(self, pattern, prune = False, field = None):
132 def grep(self, pattern, prune = False, field = None):
133 """ Return all strings matching 'pattern' (a regex or callable)
133 """ Return all strings matching 'pattern' (a regex or callable)
134
134
135 This is case-insensitive. If prune is true, return all items
135 This is case-insensitive. If prune is true, return all items
136 NOT matching the pattern.
136 NOT matching the pattern.
137
137
138 If field is specified, the match must occur in the specified
138 If field is specified, the match must occur in the specified
139 whitespace-separated field.
139 whitespace-separated field.
140
140
141 Examples::
141 Examples::
142
142
143 a.grep( lambda x: x.startswith('C') )
143 a.grep( lambda x: x.startswith('C') )
144 a.grep('Cha.*log', prune=1)
144 a.grep('Cha.*log', prune=1)
145 a.grep('chm', field=-1)
145 a.grep('chm', field=-1)
146 """
146 """
147
147
148 def match_target(s):
148 def match_target(s):
149 if field is None:
149 if field is None:
150 return s
150 return s
151 parts = s.split()
151 parts = s.split()
152 try:
152 try:
153 tgt = parts[field]
153 tgt = parts[field]
154 return tgt
154 return tgt
155 except IndexError:
155 except IndexError:
156 return ""
156 return ""
157
157
158 if isinstance(pattern, str):
158 if isinstance(pattern, str):
159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
160 else:
160 else:
161 pred = pattern
161 pred = pattern
162 if not prune:
162 if not prune:
163 return SList([el for el in self if pred(match_target(el))])
163 return SList([el for el in self if pred(match_target(el))])
164 else:
164 else:
165 return SList([el for el in self if not pred(match_target(el))])
165 return SList([el for el in self if not pred(match_target(el))])
166
166
167 def fields(self, *fields):
167 def fields(self, *fields):
168 """ Collect whitespace-separated fields from string list
168 """ Collect whitespace-separated fields from string list
169
169
170 Allows quick awk-like usage of string lists.
170 Allows quick awk-like usage of string lists.
171
171
172 Example data (in var a, created by 'a = !ls -l')::
172 Example data (in var a, created by 'a = !ls -l')::
173
173
174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
176
176
177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
179 (note the joining by space).
179 (note the joining by space).
180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
181
181
182 IndexErrors are ignored.
182 IndexErrors are ignored.
183
183
184 Without args, fields() just split()'s the strings.
184 Without args, fields() just split()'s the strings.
185 """
185 """
186 if len(fields) == 0:
186 if len(fields) == 0:
187 return [el.split() for el in self]
187 return [el.split() for el in self]
188
188
189 res = SList()
189 res = SList()
190 for el in [f.split() for f in self]:
190 for el in [f.split() for f in self]:
191 lineparts = []
191 lineparts = []
192
192
193 for fd in fields:
193 for fd in fields:
194 try:
194 try:
195 lineparts.append(el[fd])
195 lineparts.append(el[fd])
196 except IndexError:
196 except IndexError:
197 pass
197 pass
198 if lineparts:
198 if lineparts:
199 res.append(" ".join(lineparts))
199 res.append(" ".join(lineparts))
200
200
201 return res
201 return res
202
202
203 def sort(self,field= None, nums = False):
203 def sort(self,field= None, nums = False):
204 """ sort by specified fields (see fields())
204 """ sort by specified fields (see fields())
205
205
206 Example::
206 Example::
207
207
208 a.sort(1, nums = True)
208 a.sort(1, nums = True)
209
209
210 Sorts a by second field, in numerical order (so that 21 > 3)
210 Sorts a by second field, in numerical order (so that 21 > 3)
211
211
212 """
212 """
213
213
214 #decorate, sort, undecorate
214 #decorate, sort, undecorate
215 if field is not None:
215 if field is not None:
216 dsu = [[SList([line]).fields(field), line] for line in self]
216 dsu = [[SList([line]).fields(field), line] for line in self]
217 else:
217 else:
218 dsu = [[line, line] for line in self]
218 dsu = [[line, line] for line in self]
219 if nums:
219 if nums:
220 for i in range(len(dsu)):
220 for i in range(len(dsu)):
221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
222 try:
222 try:
223 n = int(numstr)
223 n = int(numstr)
224 except ValueError:
224 except ValueError:
225 n = 0
225 n = 0
226 dsu[i][0] = n
226 dsu[i][0] = n
227
227
228
228
229 dsu.sort()
229 dsu.sort()
230 return SList([t[1] for t in dsu])
230 return SList([t[1] for t in dsu])
231
231
232
232
233 # FIXME: We need to reimplement type specific displayhook and then add this
233 # FIXME: We need to reimplement type specific displayhook and then add this
234 # back as a custom printer. This should also be moved outside utils into the
234 # back as a custom printer. This should also be moved outside utils into the
235 # core.
235 # core.
236
236
237 # def print_slist(arg):
237 # def print_slist(arg):
238 # """ Prettier (non-repr-like) and more informative printer for SList """
238 # """ Prettier (non-repr-like) and more informative printer for SList """
239 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
239 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
240 # if hasattr(arg, 'hideonce') and arg.hideonce:
240 # if hasattr(arg, 'hideonce') and arg.hideonce:
241 # arg.hideonce = False
241 # arg.hideonce = False
242 # return
242 # return
243 #
243 #
244 # nlprint(arg) # This was a nested list printer, now removed.
244 # nlprint(arg) # This was a nested list printer, now removed.
245 #
245 #
246 # print_slist = result_display.register(SList)(print_slist)
246 # print_slist = result_display.register(SList)(print_slist)
247
247
248
248
249 def indent(instr,nspaces=4, ntabs=0, flatten=False):
249 def indent(instr,nspaces=4, ntabs=0, flatten=False):
250 """Indent a string a given number of spaces or tabstops.
250 """Indent a string a given number of spaces or tabstops.
251
251
252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
253
253
254 Parameters
254 Parameters
255 ----------
255 ----------
256 instr : basestring
256 instr : basestring
257 The string to be indented.
257 The string to be indented.
258 nspaces : int (default: 4)
258 nspaces : int (default: 4)
259 The number of spaces to be indented.
259 The number of spaces to be indented.
260 ntabs : int (default: 0)
260 ntabs : int (default: 0)
261 The number of tabs to be indented.
261 The number of tabs to be indented.
262 flatten : bool (default: False)
262 flatten : bool (default: False)
263 Whether to scrub existing indentation. If True, all lines will be
263 Whether to scrub existing indentation. If True, all lines will be
264 aligned to the same indentation. If False, existing indentation will
264 aligned to the same indentation. If False, existing indentation will
265 be strictly increased.
265 be strictly increased.
266
266
267 Returns
267 Returns
268 -------
268 -------
269 str|unicode : string indented by ntabs and nspaces.
269 str|unicode : string indented by ntabs and nspaces.
270
270
271 """
271 """
272 if instr is None:
272 if instr is None:
273 return
273 return
274 ind = '\t'*ntabs+' '*nspaces
274 ind = '\t'*ntabs+' '*nspaces
275 if flatten:
275 if flatten:
276 pat = re.compile(r'^\s*', re.MULTILINE)
276 pat = re.compile(r'^\s*', re.MULTILINE)
277 else:
277 else:
278 pat = re.compile(r'^', re.MULTILINE)
278 pat = re.compile(r'^', re.MULTILINE)
279 outstr = re.sub(pat, ind, instr)
279 outstr = re.sub(pat, ind, instr)
280 if outstr.endswith(os.linesep+ind):
280 if outstr.endswith(os.linesep+ind):
281 return outstr[:-len(ind)]
281 return outstr[:-len(ind)]
282 else:
282 else:
283 return outstr
283 return outstr
284
284
285
285
286 def list_strings(arg):
286 def list_strings(arg):
287 """Always return a list of strings, given a string or list of strings
287 """Always return a list of strings, given a string or list of strings
288 as input.
288 as input.
289
289
290 Examples
290 Examples
291 --------
291 --------
292 ::
292 ::
293
293
294 In [7]: list_strings('A single string')
294 In [7]: list_strings('A single string')
295 Out[7]: ['A single string']
295 Out[7]: ['A single string']
296
296
297 In [8]: list_strings(['A single string in a list'])
297 In [8]: list_strings(['A single string in a list'])
298 Out[8]: ['A single string in a list']
298 Out[8]: ['A single string in a list']
299
299
300 In [9]: list_strings(['A','list','of','strings'])
300 In [9]: list_strings(['A','list','of','strings'])
301 Out[9]: ['A', 'list', 'of', 'strings']
301 Out[9]: ['A', 'list', 'of', 'strings']
302 """
302 """
303
303
304 if isinstance(arg, str):
304 if isinstance(arg, str):
305 return [arg]
305 return [arg]
306 else:
306 else:
307 return arg
307 return arg
308
308
309
309
310 def marquee(txt='',width=78,mark='*'):
310 def marquee(txt='',width=78,mark='*'):
311 """Return the input string centered in a 'marquee'.
311 """Return the input string centered in a 'marquee'.
312
312
313 Examples
313 Examples
314 --------
314 --------
315 ::
315 ::
316
316
317 In [16]: marquee('A test',40)
317 In [16]: marquee('A test',40)
318 Out[16]: '**************** A test ****************'
318 Out[16]: '**************** A test ****************'
319
319
320 In [17]: marquee('A test',40,'-')
320 In [17]: marquee('A test',40,'-')
321 Out[17]: '---------------- A test ----------------'
321 Out[17]: '---------------- A test ----------------'
322
322
323 In [18]: marquee('A test',40,' ')
323 In [18]: marquee('A test',40,' ')
324 Out[18]: ' A test '
324 Out[18]: ' A test '
325
325
326 """
326 """
327 if not txt:
327 if not txt:
328 return (mark*width)[:width]
328 return (mark*width)[:width]
329 nmark = (width-len(txt)-2)//len(mark)//2
329 nmark = (width-len(txt)-2)//len(mark)//2
330 if nmark < 0: nmark =0
330 if nmark < 0: nmark =0
331 marks = mark*nmark
331 marks = mark*nmark
332 return '%s %s %s' % (marks,txt,marks)
332 return '%s %s %s' % (marks,txt,marks)
333
333
334
334
335 ini_spaces_re = re.compile(r'^(\s+)')
335 ini_spaces_re = re.compile(r'^(\s+)')
336
336
337 def num_ini_spaces(strng):
337 def num_ini_spaces(strng):
338 """Return the number of initial spaces in a string"""
338 """Return the number of initial spaces in a string"""
339
339
340 ini_spaces = ini_spaces_re.match(strng)
340 ini_spaces = ini_spaces_re.match(strng)
341 if ini_spaces:
341 if ini_spaces:
342 return ini_spaces.end()
342 return ini_spaces.end()
343 else:
343 else:
344 return 0
344 return 0
345
345
346
346
347 def format_screen(strng):
347 def format_screen(strng):
348 """Format a string for screen printing.
348 """Format a string for screen printing.
349
349
350 This removes some latex-type format codes."""
350 This removes some latex-type format codes."""
351 # Paragraph continue
351 # Paragraph continue
352 par_re = re.compile(r'\\$',re.MULTILINE)
352 par_re = re.compile(r'\\$',re.MULTILINE)
353 strng = par_re.sub('',strng)
353 strng = par_re.sub('',strng)
354 return strng
354 return strng
355
355
356
356
357 def dedent(text):
357 def dedent(text):
358 """Equivalent of textwrap.dedent that ignores unindented first line.
358 """Equivalent of textwrap.dedent that ignores unindented first line.
359
359
360 This means it will still dedent strings like:
360 This means it will still dedent strings like:
361 '''foo
361 '''foo
362 is a bar
362 is a bar
363 '''
363 '''
364
364
365 For use in wrap_paragraphs.
365 For use in wrap_paragraphs.
366 """
366 """
367
367
368 if text.startswith('\n'):
368 if text.startswith('\n'):
369 # text starts with blank line, don't ignore the first line
369 # text starts with blank line, don't ignore the first line
370 return textwrap.dedent(text)
370 return textwrap.dedent(text)
371
371
372 # split first line
372 # split first line
373 splits = text.split('\n',1)
373 splits = text.split('\n',1)
374 if len(splits) == 1:
374 if len(splits) == 1:
375 # only one line
375 # only one line
376 return textwrap.dedent(text)
376 return textwrap.dedent(text)
377
377
378 first, rest = splits
378 first, rest = splits
379 # dedent everything but the first line
379 # dedent everything but the first line
380 rest = textwrap.dedent(rest)
380 rest = textwrap.dedent(rest)
381 return '\n'.join([first, rest])
381 return '\n'.join([first, rest])
382
382
383
383
384 def wrap_paragraphs(text, ncols=80):
384 def wrap_paragraphs(text, ncols=80):
385 """Wrap multiple paragraphs to fit a specified width.
385 """Wrap multiple paragraphs to fit a specified width.
386
386
387 This is equivalent to textwrap.wrap, but with support for multiple
387 This is equivalent to textwrap.wrap, but with support for multiple
388 paragraphs, as separated by empty lines.
388 paragraphs, as separated by empty lines.
389
389
390 Returns
390 Returns
391 -------
391 -------
392 list of complete paragraphs, wrapped to fill `ncols` columns.
392 list of complete paragraphs, wrapped to fill `ncols` columns.
393 """
393 """
394 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
394 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
395 text = dedent(text).strip()
395 text = dedent(text).strip()
396 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
396 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
397 out_ps = []
397 out_ps = []
398 indent_re = re.compile(r'\n\s+', re.MULTILINE)
398 indent_re = re.compile(r'\n\s+', re.MULTILINE)
399 for p in paragraphs:
399 for p in paragraphs:
400 # presume indentation that survives dedent is meaningful formatting,
400 # presume indentation that survives dedent is meaningful formatting,
401 # so don't fill unless text is flush.
401 # so don't fill unless text is flush.
402 if indent_re.search(p) is None:
402 if indent_re.search(p) is None:
403 # wrap paragraph
403 # wrap paragraph
404 p = textwrap.fill(p, ncols)
404 p = textwrap.fill(p, ncols)
405 out_ps.append(p)
405 out_ps.append(p)
406 return out_ps
406 return out_ps
407
407
408
408
409 def strip_email_quotes(text):
409 def strip_email_quotes(text):
410 """Strip leading email quotation characters ('>').
410 """Strip leading email quotation characters ('>').
411
411
412 Removes any combination of leading '>' interspersed with whitespace that
412 Removes any combination of leading '>' interspersed with whitespace that
413 appears *identically* in all lines of the input text.
413 appears *identically* in all lines of the input text.
414
414
415 Parameters
415 Parameters
416 ----------
416 ----------
417 text : str
417 text : str
418
418
419 Examples
419 Examples
420 --------
420 --------
421
421
422 Simple uses::
422 Simple uses::
423
423
424 In [2]: strip_email_quotes('> > text')
424 In [2]: strip_email_quotes('> > text')
425 Out[2]: 'text'
425 Out[2]: 'text'
426
426
427 In [3]: strip_email_quotes('> > text\\n> > more')
427 In [3]: strip_email_quotes('> > text\\n> > more')
428 Out[3]: 'text\\nmore'
428 Out[3]: 'text\\nmore'
429
429
430 Note how only the common prefix that appears in all lines is stripped::
430 Note how only the common prefix that appears in all lines is stripped::
431
431
432 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
432 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
433 Out[4]: '> text\\n> more\\nmore...'
433 Out[4]: '> text\\n> more\\nmore...'
434
434
435 So if any line has no quote marks ('>'), then none are stripped from any
435 So if any line has no quote marks ('>'), then none are stripped from any
436 of them ::
436 of them ::
437
437
438 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
438 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
439 Out[5]: '> > text\\n> > more\\nlast different'
439 Out[5]: '> > text\\n> > more\\nlast different'
440 """
440 """
441 lines = text.splitlines()
441 lines = text.splitlines()
442 strip_len = 0
442 strip_len = 0
443
443
444 for characters in zip(*lines):
444 for characters in zip(*lines):
445 # Check if all characters in this position are the same
445 # Check if all characters in this position are the same
446 if len(set(characters)) > 1:
446 if len(set(characters)) > 1:
447 break
447 break
448 prefix_char = characters[0]
448 prefix_char = characters[0]
449
449
450 if prefix_char in string.whitespace or prefix_char == ">":
450 if prefix_char in string.whitespace or prefix_char == ">":
451 strip_len += 1
451 strip_len += 1
452 else:
452 else:
453 break
453 break
454
454
455 text = "\n".join([ln[strip_len:] for ln in lines])
455 text = "\n".join([ln[strip_len:] for ln in lines])
456 return text
456 return text
457
457
458
458
459 def strip_ansi(source):
459 def strip_ansi(source):
460 """
460 """
461 Remove ansi escape codes from text.
461 Remove ansi escape codes from text.
462
462
463 Parameters
463 Parameters
464 ----------
464 ----------
465 source : str
465 source : str
466 Source to remove the ansi from
466 Source to remove the ansi from
467 """
467 """
468 return re.sub(r'\033\[(\d|;)+?m', '', source)
468 return re.sub(r'\033\[(\d|;)+?m', '', source)
469
469
470
470
471 class EvalFormatter(Formatter):
471 class EvalFormatter(Formatter):
472 """A String Formatter that allows evaluation of simple expressions.
472 """A String Formatter that allows evaluation of simple expressions.
473
473
474 Note that this version interprets a : as specifying a format string (as per
474 Note that this version interprets a `:` as specifying a format string (as per
475 standard string formatting), so if slicing is required, you must explicitly
475 standard string formatting), so if slicing is required, you must explicitly
476 create a slice.
476 create a slice.
477
477
478 This is to be used in templating cases, such as the parallel batch
478 This is to be used in templating cases, such as the parallel batch
479 script templates, where simple arithmetic on arguments is useful.
479 script templates, where simple arithmetic on arguments is useful.
480
480
481 Examples
481 Examples
482 --------
482 --------
483 ::
483 ::
484
484
485 In [1]: f = EvalFormatter()
485 In [1]: f = EvalFormatter()
486 In [2]: f.format('{n//4}', n=8)
486 In [2]: f.format('{n//4}', n=8)
487 Out[2]: '2'
487 Out[2]: '2'
488
488
489 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
489 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
490 Out[3]: 'll'
490 Out[3]: 'll'
491 """
491 """
492 def get_field(self, name, args, kwargs):
492 def get_field(self, name, args, kwargs):
493 v = eval(name, kwargs)
493 v = eval(name, kwargs)
494 return v, name
494 return v, name
495
495
496 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
496 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
497 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
497 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
498 # above, it should be possible to remove FullEvalFormatter.
498 # above, it should be possible to remove FullEvalFormatter.
499
499
500 class FullEvalFormatter(Formatter):
500 class FullEvalFormatter(Formatter):
501 """A String Formatter that allows evaluation of simple expressions.
501 """A String Formatter that allows evaluation of simple expressions.
502
502
503 Any time a format key is not found in the kwargs,
503 Any time a format key is not found in the kwargs,
504 it will be tried as an expression in the kwargs namespace.
504 it will be tried as an expression in the kwargs namespace.
505
505
506 Note that this version allows slicing using [1:2], so you cannot specify
506 Note that this version allows slicing using [1:2], so you cannot specify
507 a format string. Use :class:`EvalFormatter` to permit format strings.
507 a format string. Use :class:`EvalFormatter` to permit format strings.
508
508
509 Examples
509 Examples
510 --------
510 --------
511 ::
511 ::
512
512
513 In [1]: f = FullEvalFormatter()
513 In [1]: f = FullEvalFormatter()
514 In [2]: f.format('{n//4}', n=8)
514 In [2]: f.format('{n//4}', n=8)
515 Out[2]: '2'
515 Out[2]: '2'
516
516
517 In [3]: f.format('{list(range(5))[2:4]}')
517 In [3]: f.format('{list(range(5))[2:4]}')
518 Out[3]: '[2, 3]'
518 Out[3]: '[2, 3]'
519
519
520 In [4]: f.format('{3*2}')
520 In [4]: f.format('{3*2}')
521 Out[4]: '6'
521 Out[4]: '6'
522 """
522 """
523 # copied from Formatter._vformat with minor changes to allow eval
523 # copied from Formatter._vformat with minor changes to allow eval
524 # and replace the format_spec code with slicing
524 # and replace the format_spec code with slicing
525 def vformat(self, format_string:str, args, kwargs)->str:
525 def vformat(self, format_string:str, args, kwargs)->str:
526 result = []
526 result = []
527 for literal_text, field_name, format_spec, conversion in \
527 for literal_text, field_name, format_spec, conversion in \
528 self.parse(format_string):
528 self.parse(format_string):
529
529
530 # output the literal text
530 # output the literal text
531 if literal_text:
531 if literal_text:
532 result.append(literal_text)
532 result.append(literal_text)
533
533
534 # if there's a field, output it
534 # if there's a field, output it
535 if field_name is not None:
535 if field_name is not None:
536 # this is some markup, find the object and do
536 # this is some markup, find the object and do
537 # the formatting
537 # the formatting
538
538
539 if format_spec:
539 if format_spec:
540 # override format spec, to allow slicing:
540 # override format spec, to allow slicing:
541 field_name = ':'.join([field_name, format_spec])
541 field_name = ':'.join([field_name, format_spec])
542
542
543 # eval the contents of the field for the object
543 # eval the contents of the field for the object
544 # to be formatted
544 # to be formatted
545 obj = eval(field_name, kwargs)
545 obj = eval(field_name, kwargs)
546
546
547 # do any conversion on the resulting object
547 # do any conversion on the resulting object
548 obj = self.convert_field(obj, conversion)
548 obj = self.convert_field(obj, conversion)
549
549
550 # format the object and append to the result
550 # format the object and append to the result
551 result.append(self.format_field(obj, ''))
551 result.append(self.format_field(obj, ''))
552
552
553 return ''.join(result)
553 return ''.join(result)
554
554
555
555
556 class DollarFormatter(FullEvalFormatter):
556 class DollarFormatter(FullEvalFormatter):
557 """Formatter allowing Itpl style $foo replacement, for names and attribute
557 """Formatter allowing Itpl style $foo replacement, for names and attribute
558 access only. Standard {foo} replacement also works, and allows full
558 access only. Standard {foo} replacement also works, and allows full
559 evaluation of its arguments.
559 evaluation of its arguments.
560
560
561 Examples
561 Examples
562 --------
562 --------
563 ::
563 ::
564
564
565 In [1]: f = DollarFormatter()
565 In [1]: f = DollarFormatter()
566 In [2]: f.format('{n//4}', n=8)
566 In [2]: f.format('{n//4}', n=8)
567 Out[2]: '2'
567 Out[2]: '2'
568
568
569 In [3]: f.format('23 * 76 is $result', result=23*76)
569 In [3]: f.format('23 * 76 is $result', result=23*76)
570 Out[3]: '23 * 76 is 1748'
570 Out[3]: '23 * 76 is 1748'
571
571
572 In [4]: f.format('$a or {b}', a=1, b=2)
572 In [4]: f.format('$a or {b}', a=1, b=2)
573 Out[4]: '1 or 2'
573 Out[4]: '1 or 2'
574 """
574 """
575 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
575 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
576 def parse(self, fmt_string):
576 def parse(self, fmt_string):
577 for literal_txt, field_name, format_spec, conversion \
577 for literal_txt, field_name, format_spec, conversion \
578 in Formatter.parse(self, fmt_string):
578 in Formatter.parse(self, fmt_string):
579
579
580 # Find $foo patterns in the literal text.
580 # Find $foo patterns in the literal text.
581 continue_from = 0
581 continue_from = 0
582 txt = ""
582 txt = ""
583 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
583 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
584 new_txt, new_field = m.group(1,2)
584 new_txt, new_field = m.group(1,2)
585 # $$foo --> $foo
585 # $$foo --> $foo
586 if new_field.startswith("$"):
586 if new_field.startswith("$"):
587 txt += new_txt + new_field
587 txt += new_txt + new_field
588 else:
588 else:
589 yield (txt + new_txt, new_field, "", None)
589 yield (txt + new_txt, new_field, "", None)
590 txt = ""
590 txt = ""
591 continue_from = m.end()
591 continue_from = m.end()
592
592
593 # Re-yield the {foo} style pattern
593 # Re-yield the {foo} style pattern
594 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
594 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
595
595
596 def __repr__(self):
596 def __repr__(self):
597 return "<DollarFormatter>"
597 return "<DollarFormatter>"
598
598
599 #-----------------------------------------------------------------------------
599 #-----------------------------------------------------------------------------
600 # Utils to columnize a list of string
600 # Utils to columnize a list of string
601 #-----------------------------------------------------------------------------
601 #-----------------------------------------------------------------------------
602
602
603 def _col_chunks(l, max_rows, row_first=False):
603 def _col_chunks(l, max_rows, row_first=False):
604 """Yield successive max_rows-sized column chunks from l."""
604 """Yield successive max_rows-sized column chunks from l."""
605 if row_first:
605 if row_first:
606 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
606 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
607 for i in range(ncols):
607 for i in range(ncols):
608 yield [l[j] for j in range(i, len(l), ncols)]
608 yield [l[j] for j in range(i, len(l), ncols)]
609 else:
609 else:
610 for i in range(0, len(l), max_rows):
610 for i in range(0, len(l), max_rows):
611 yield l[i:(i + max_rows)]
611 yield l[i:(i + max_rows)]
612
612
613
613
614 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
614 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
615 """Calculate optimal info to columnize a list of string"""
615 """Calculate optimal info to columnize a list of string"""
616 for max_rows in range(1, len(rlist) + 1):
616 for max_rows in range(1, len(rlist) + 1):
617 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
617 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
618 sumlength = sum(col_widths)
618 sumlength = sum(col_widths)
619 ncols = len(col_widths)
619 ncols = len(col_widths)
620 if sumlength + separator_size * (ncols - 1) <= displaywidth:
620 if sumlength + separator_size * (ncols - 1) <= displaywidth:
621 break
621 break
622 return {'num_columns': ncols,
622 return {'num_columns': ncols,
623 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
623 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
624 'max_rows': max_rows,
624 'max_rows': max_rows,
625 'column_widths': col_widths
625 'column_widths': col_widths
626 }
626 }
627
627
628
628
629 def _get_or_default(mylist, i, default=None):
629 def _get_or_default(mylist, i, default=None):
630 """return list item number, or default if don't exist"""
630 """return list item number, or default if don't exist"""
631 if i >= len(mylist):
631 if i >= len(mylist):
632 return default
632 return default
633 else :
633 else :
634 return mylist[i]
634 return mylist[i]
635
635
636
636
637 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
637 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
638 """Returns a nested list, and info to columnize items
638 """Returns a nested list, and info to columnize items
639
639
640 Parameters
640 Parameters
641 ----------
641 ----------
642 items
642 items
643 list of strings to columize
643 list of strings to columize
644 row_first : (default False)
644 row_first : (default False)
645 Whether to compute columns for a row-first matrix instead of
645 Whether to compute columns for a row-first matrix instead of
646 column-first (default).
646 column-first (default).
647 empty : (default None)
647 empty : (default None)
648 default value to fill list if needed
648 default value to fill list if needed
649 separator_size : int (default=2)
649 separator_size : int (default=2)
650 How much characters will be used as a separation between each columns.
650 How much characters will be used as a separation between each columns.
651 displaywidth : int (default=80)
651 displaywidth : int (default=80)
652 The width of the area onto which the columns should enter
652 The width of the area onto which the columns should enter
653
653
654 Returns
654 Returns
655 -------
655 -------
656 strings_matrix
656 strings_matrix
657 nested list of string, the outer most list contains as many list as
657 nested list of string, the outer most list contains as many list as
658 rows, the innermost lists have each as many element as columns. If the
658 rows, the innermost lists have each as many element as columns. If the
659 total number of elements in `items` does not equal the product of
659 total number of elements in `items` does not equal the product of
660 rows*columns, the last element of some lists are filled with `None`.
660 rows*columns, the last element of some lists are filled with `None`.
661 dict_info
661 dict_info
662 some info to make columnize easier:
662 some info to make columnize easier:
663
663
664 num_columns
664 num_columns
665 number of columns
665 number of columns
666 max_rows
666 max_rows
667 maximum number of rows (final number may be less)
667 maximum number of rows (final number may be less)
668 column_widths
668 column_widths
669 list of with of each columns
669 list of with of each columns
670 optimal_separator_width
670 optimal_separator_width
671 best separator width between columns
671 best separator width between columns
672
672
673 Examples
673 Examples
674 --------
674 --------
675 ::
675 ::
676
676
677 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
677 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
678 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
678 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
679 In [3]: list
679 In [3]: list
680 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
680 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
681 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
681 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
682 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
682 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
683 Out[5]: True
683 Out[5]: True
684 """
684 """
685 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
685 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
686 nrow, ncol = info['max_rows'], info['num_columns']
686 nrow, ncol = info['max_rows'], info['num_columns']
687 if row_first:
687 if row_first:
688 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
688 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
689 else:
689 else:
690 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
690 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
691
691
692
692
693 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
693 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
694 """ Transform a list of strings into a single string with columns.
694 """ Transform a list of strings into a single string with columns.
695
695
696 Parameters
696 Parameters
697 ----------
697 ----------
698 items : sequence of strings
698 items : sequence of strings
699 The strings to process.
699 The strings to process.
700 row_first : (default False)
700 row_first : (default False)
701 Whether to compute columns for a row-first matrix instead of
701 Whether to compute columns for a row-first matrix instead of
702 column-first (default).
702 column-first (default).
703 separator : str, optional [default is two spaces]
703 separator : str, optional [default is two spaces]
704 The string that separates columns.
704 The string that separates columns.
705 displaywidth : int, optional [default is 80]
705 displaywidth : int, optional [default is 80]
706 Width of the display in number of characters.
706 Width of the display in number of characters.
707
707
708 Returns
708 Returns
709 -------
709 -------
710 The formatted string.
710 The formatted string.
711 """
711 """
712 if not items:
712 if not items:
713 return '\n'
713 return '\n'
714 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
714 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
715 if spread:
715 if spread:
716 separator = separator.ljust(int(info['optimal_separator_width']))
716 separator = separator.ljust(int(info['optimal_separator_width']))
717 fmatrix = [filter(None, x) for x in matrix]
717 fmatrix = [filter(None, x) for x in matrix]
718 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
718 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
719 return '\n'.join(map(sjoin, fmatrix))+'\n'
719 return '\n'.join(map(sjoin, fmatrix))+'\n'
720
720
721
721
722 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
722 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
723 """
723 """
724 Return a string with a natural enumeration of items
724 Return a string with a natural enumeration of items
725
725
726 >>> get_text_list(['a', 'b', 'c', 'd'])
726 >>> get_text_list(['a', 'b', 'c', 'd'])
727 'a, b, c and d'
727 'a, b, c and d'
728 >>> get_text_list(['a', 'b', 'c'], ' or ')
728 >>> get_text_list(['a', 'b', 'c'], ' or ')
729 'a, b or c'
729 'a, b or c'
730 >>> get_text_list(['a', 'b', 'c'], ', ')
730 >>> get_text_list(['a', 'b', 'c'], ', ')
731 'a, b, c'
731 'a, b, c'
732 >>> get_text_list(['a', 'b'], ' or ')
732 >>> get_text_list(['a', 'b'], ' or ')
733 'a or b'
733 'a or b'
734 >>> get_text_list(['a'])
734 >>> get_text_list(['a'])
735 'a'
735 'a'
736 >>> get_text_list([])
736 >>> get_text_list([])
737 ''
737 ''
738 >>> get_text_list(['a', 'b'], wrap_item_with="`")
738 >>> get_text_list(['a', 'b'], wrap_item_with="`")
739 '`a` and `b`'
739 '`a` and `b`'
740 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
740 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
741 'a + b + c = d'
741 'a + b + c = d'
742 """
742 """
743 if len(list_) == 0:
743 if len(list_) == 0:
744 return ''
744 return ''
745 if wrap_item_with:
745 if wrap_item_with:
746 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
746 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
747 item in list_]
747 item in list_]
748 if len(list_) == 1:
748 if len(list_) == 1:
749 return list_[0]
749 return list_[0]
750 return '%s%s%s' % (
750 return '%s%s%s' % (
751 sep.join(i for i in list_[:-1]),
751 sep.join(i for i in list_[:-1]),
752 last_sep, list_[-1])
752 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now