##// END OF EJS Templates
don't skip doctest
Paul Ivanov -
Show More
@@ -1,782 +1,779 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 import os
11 import os
12 import re
12 import re
13 import sys
13 import sys
14 import textwrap
14 import textwrap
15 from string import Formatter
15 from string import Formatter
16 try:
16 try:
17 from pathlib import Path
17 from pathlib import Path
18 except ImportError:
18 except ImportError:
19 # Python 2 backport
19 # Python 2 backport
20 from pathlib2 import Path
20 from pathlib2 import Path
21
21
22 from IPython.testing.skipdoctest import skip_doctest
23 from IPython.utils import py3compat
22 from IPython.utils import py3compat
24
23
25 # datetime.strftime date format for ipython
24 # datetime.strftime date format for ipython
26 if sys.platform == 'win32':
25 if sys.platform == 'win32':
27 date_format = "%B %d, %Y"
26 date_format = "%B %d, %Y"
28 else:
27 else:
29 date_format = "%B %-d, %Y"
28 date_format = "%B %-d, %Y"
30
29
31 class LSString(str):
30 class LSString(str):
32 """String derivative with a special access attributes.
31 """String derivative with a special access attributes.
33
32
34 These are normal strings, but with the special attributes:
33 These are normal strings, but with the special attributes:
35
34
36 .l (or .list) : value as list (split on newlines).
35 .l (or .list) : value as list (split on newlines).
37 .n (or .nlstr): original value (the string itself).
36 .n (or .nlstr): original value (the string itself).
38 .s (or .spstr): value as whitespace-separated string.
37 .s (or .spstr): value as whitespace-separated string.
39 .p (or .paths): list of path objects (requires path.py package)
38 .p (or .paths): list of path objects (requires path.py package)
40
39
41 Any values which require transformations are computed only once and
40 Any values which require transformations are computed only once and
42 cached.
41 cached.
43
42
44 Such strings are very useful to efficiently interact with the shell, which
43 Such strings are very useful to efficiently interact with the shell, which
45 typically only understands whitespace-separated options for commands."""
44 typically only understands whitespace-separated options for commands."""
46
45
47 def get_list(self):
46 def get_list(self):
48 try:
47 try:
49 return self.__list
48 return self.__list
50 except AttributeError:
49 except AttributeError:
51 self.__list = self.split('\n')
50 self.__list = self.split('\n')
52 return self.__list
51 return self.__list
53
52
54 l = list = property(get_list)
53 l = list = property(get_list)
55
54
56 def get_spstr(self):
55 def get_spstr(self):
57 try:
56 try:
58 return self.__spstr
57 return self.__spstr
59 except AttributeError:
58 except AttributeError:
60 self.__spstr = self.replace('\n',' ')
59 self.__spstr = self.replace('\n',' ')
61 return self.__spstr
60 return self.__spstr
62
61
63 s = spstr = property(get_spstr)
62 s = spstr = property(get_spstr)
64
63
65 def get_nlstr(self):
64 def get_nlstr(self):
66 return self
65 return self
67
66
68 n = nlstr = property(get_nlstr)
67 n = nlstr = property(get_nlstr)
69
68
70 def get_paths(self):
69 def get_paths(self):
71 try:
70 try:
72 return self.__paths
71 return self.__paths
73 except AttributeError:
72 except AttributeError:
74 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
73 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
75 return self.__paths
74 return self.__paths
76
75
77 p = paths = property(get_paths)
76 p = paths = property(get_paths)
78
77
79 # FIXME: We need to reimplement type specific displayhook and then add this
78 # FIXME: We need to reimplement type specific displayhook and then add this
80 # back as a custom printer. This should also be moved outside utils into the
79 # back as a custom printer. This should also be moved outside utils into the
81 # core.
80 # core.
82
81
83 # def print_lsstring(arg):
82 # def print_lsstring(arg):
84 # """ Prettier (non-repr-like) and more informative printer for LSString """
83 # """ Prettier (non-repr-like) and more informative printer for LSString """
85 # print "LSString (.p, .n, .l, .s available). Value:"
84 # print "LSString (.p, .n, .l, .s available). Value:"
86 # print arg
85 # print arg
87 #
86 #
88 #
87 #
89 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
88 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
90
89
91
90
92 class SList(list):
91 class SList(list):
93 """List derivative with a special access attributes.
92 """List derivative with a special access attributes.
94
93
95 These are normal lists, but with the special attributes:
94 These are normal lists, but with the special attributes:
96
95
97 * .l (or .list) : value as list (the list itself).
96 * .l (or .list) : value as list (the list itself).
98 * .n (or .nlstr): value as a string, joined on newlines.
97 * .n (or .nlstr): value as a string, joined on newlines.
99 * .s (or .spstr): value as a string, joined on spaces.
98 * .s (or .spstr): value as a string, joined on spaces.
100 * .p (or .paths): list of path objects (requires path.py package)
99 * .p (or .paths): list of path objects (requires path.py package)
101
100
102 Any values which require transformations are computed only once and
101 Any values which require transformations are computed only once and
103 cached."""
102 cached."""
104
103
105 def get_list(self):
104 def get_list(self):
106 return self
105 return self
107
106
108 l = list = property(get_list)
107 l = list = property(get_list)
109
108
110 def get_spstr(self):
109 def get_spstr(self):
111 try:
110 try:
112 return self.__spstr
111 return self.__spstr
113 except AttributeError:
112 except AttributeError:
114 self.__spstr = ' '.join(self)
113 self.__spstr = ' '.join(self)
115 return self.__spstr
114 return self.__spstr
116
115
117 s = spstr = property(get_spstr)
116 s = spstr = property(get_spstr)
118
117
119 def get_nlstr(self):
118 def get_nlstr(self):
120 try:
119 try:
121 return self.__nlstr
120 return self.__nlstr
122 except AttributeError:
121 except AttributeError:
123 self.__nlstr = '\n'.join(self)
122 self.__nlstr = '\n'.join(self)
124 return self.__nlstr
123 return self.__nlstr
125
124
126 n = nlstr = property(get_nlstr)
125 n = nlstr = property(get_nlstr)
127
126
128 def get_paths(self):
127 def get_paths(self):
129 try:
128 try:
130 return self.__paths
129 return self.__paths
131 except AttributeError:
130 except AttributeError:
132 self.__paths = [Path(p) for p in self if os.path.exists(p)]
131 self.__paths = [Path(p) for p in self if os.path.exists(p)]
133 return self.__paths
132 return self.__paths
134
133
135 p = paths = property(get_paths)
134 p = paths = property(get_paths)
136
135
137 def grep(self, pattern, prune = False, field = None):
136 def grep(self, pattern, prune = False, field = None):
138 """ Return all strings matching 'pattern' (a regex or callable)
137 """ Return all strings matching 'pattern' (a regex or callable)
139
138
140 This is case-insensitive. If prune is true, return all items
139 This is case-insensitive. If prune is true, return all items
141 NOT matching the pattern.
140 NOT matching the pattern.
142
141
143 If field is specified, the match must occur in the specified
142 If field is specified, the match must occur in the specified
144 whitespace-separated field.
143 whitespace-separated field.
145
144
146 Examples::
145 Examples::
147
146
148 a.grep( lambda x: x.startswith('C') )
147 a.grep( lambda x: x.startswith('C') )
149 a.grep('Cha.*log', prune=1)
148 a.grep('Cha.*log', prune=1)
150 a.grep('chm', field=-1)
149 a.grep('chm', field=-1)
151 """
150 """
152
151
153 def match_target(s):
152 def match_target(s):
154 if field is None:
153 if field is None:
155 return s
154 return s
156 parts = s.split()
155 parts = s.split()
157 try:
156 try:
158 tgt = parts[field]
157 tgt = parts[field]
159 return tgt
158 return tgt
160 except IndexError:
159 except IndexError:
161 return ""
160 return ""
162
161
163 if isinstance(pattern, py3compat.string_types):
162 if isinstance(pattern, py3compat.string_types):
164 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
163 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
165 else:
164 else:
166 pred = pattern
165 pred = pattern
167 if not prune:
166 if not prune:
168 return SList([el for el in self if pred(match_target(el))])
167 return SList([el for el in self if pred(match_target(el))])
169 else:
168 else:
170 return SList([el for el in self if not pred(match_target(el))])
169 return SList([el for el in self if not pred(match_target(el))])
171
170
172 def fields(self, *fields):
171 def fields(self, *fields):
173 """ Collect whitespace-separated fields from string list
172 """ Collect whitespace-separated fields from string list
174
173
175 Allows quick awk-like usage of string lists.
174 Allows quick awk-like usage of string lists.
176
175
177 Example data (in var a, created by 'a = !ls -l')::
176 Example data (in var a, created by 'a = !ls -l')::
178
177
179 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
178 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
180 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
179 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
181
180
182 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
181 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
183 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
182 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
184 (note the joining by space).
183 (note the joining by space).
185 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
184 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
186
185
187 IndexErrors are ignored.
186 IndexErrors are ignored.
188
187
189 Without args, fields() just split()'s the strings.
188 Without args, fields() just split()'s the strings.
190 """
189 """
191 if len(fields) == 0:
190 if len(fields) == 0:
192 return [el.split() for el in self]
191 return [el.split() for el in self]
193
192
194 res = SList()
193 res = SList()
195 for el in [f.split() for f in self]:
194 for el in [f.split() for f in self]:
196 lineparts = []
195 lineparts = []
197
196
198 for fd in fields:
197 for fd in fields:
199 try:
198 try:
200 lineparts.append(el[fd])
199 lineparts.append(el[fd])
201 except IndexError:
200 except IndexError:
202 pass
201 pass
203 if lineparts:
202 if lineparts:
204 res.append(" ".join(lineparts))
203 res.append(" ".join(lineparts))
205
204
206 return res
205 return res
207
206
208 def sort(self,field= None, nums = False):
207 def sort(self,field= None, nums = False):
209 """ sort by specified fields (see fields())
208 """ sort by specified fields (see fields())
210
209
211 Example::
210 Example::
212
211
213 a.sort(1, nums = True)
212 a.sort(1, nums = True)
214
213
215 Sorts a by second field, in numerical order (so that 21 > 3)
214 Sorts a by second field, in numerical order (so that 21 > 3)
216
215
217 """
216 """
218
217
219 #decorate, sort, undecorate
218 #decorate, sort, undecorate
220 if field is not None:
219 if field is not None:
221 dsu = [[SList([line]).fields(field), line] for line in self]
220 dsu = [[SList([line]).fields(field), line] for line in self]
222 else:
221 else:
223 dsu = [[line, line] for line in self]
222 dsu = [[line, line] for line in self]
224 if nums:
223 if nums:
225 for i in range(len(dsu)):
224 for i in range(len(dsu)):
226 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
225 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
227 try:
226 try:
228 n = int(numstr)
227 n = int(numstr)
229 except ValueError:
228 except ValueError:
230 n = 0
229 n = 0
231 dsu[i][0] = n
230 dsu[i][0] = n
232
231
233
232
234 dsu.sort()
233 dsu.sort()
235 return SList([t[1] for t in dsu])
234 return SList([t[1] for t in dsu])
236
235
237
236
238 # FIXME: We need to reimplement type specific displayhook and then add this
237 # FIXME: We need to reimplement type specific displayhook and then add this
239 # back as a custom printer. This should also be moved outside utils into the
238 # back as a custom printer. This should also be moved outside utils into the
240 # core.
239 # core.
241
240
242 # def print_slist(arg):
241 # def print_slist(arg):
243 # """ Prettier (non-repr-like) and more informative printer for SList """
242 # """ Prettier (non-repr-like) and more informative printer for SList """
244 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
243 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
245 # if hasattr(arg, 'hideonce') and arg.hideonce:
244 # if hasattr(arg, 'hideonce') and arg.hideonce:
246 # arg.hideonce = False
245 # arg.hideonce = False
247 # return
246 # return
248 #
247 #
249 # nlprint(arg) # This was a nested list printer, now removed.
248 # nlprint(arg) # This was a nested list printer, now removed.
250 #
249 #
251 # print_slist = result_display.when_type(SList)(print_slist)
250 # print_slist = result_display.when_type(SList)(print_slist)
252
251
253
252
254 def indent(instr,nspaces=4, ntabs=0, flatten=False):
253 def indent(instr,nspaces=4, ntabs=0, flatten=False):
255 """Indent a string a given number of spaces or tabstops.
254 """Indent a string a given number of spaces or tabstops.
256
255
257 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
256 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
258
257
259 Parameters
258 Parameters
260 ----------
259 ----------
261
260
262 instr : basestring
261 instr : basestring
263 The string to be indented.
262 The string to be indented.
264 nspaces : int (default: 4)
263 nspaces : int (default: 4)
265 The number of spaces to be indented.
264 The number of spaces to be indented.
266 ntabs : int (default: 0)
265 ntabs : int (default: 0)
267 The number of tabs to be indented.
266 The number of tabs to be indented.
268 flatten : bool (default: False)
267 flatten : bool (default: False)
269 Whether to scrub existing indentation. If True, all lines will be
268 Whether to scrub existing indentation. If True, all lines will be
270 aligned to the same indentation. If False, existing indentation will
269 aligned to the same indentation. If False, existing indentation will
271 be strictly increased.
270 be strictly increased.
272
271
273 Returns
272 Returns
274 -------
273 -------
275
274
276 str|unicode : string indented by ntabs and nspaces.
275 str|unicode : string indented by ntabs and nspaces.
277
276
278 """
277 """
279 if instr is None:
278 if instr is None:
280 return
279 return
281 ind = '\t'*ntabs+' '*nspaces
280 ind = '\t'*ntabs+' '*nspaces
282 if flatten:
281 if flatten:
283 pat = re.compile(r'^\s*', re.MULTILINE)
282 pat = re.compile(r'^\s*', re.MULTILINE)
284 else:
283 else:
285 pat = re.compile(r'^', re.MULTILINE)
284 pat = re.compile(r'^', re.MULTILINE)
286 outstr = re.sub(pat, ind, instr)
285 outstr = re.sub(pat, ind, instr)
287 if outstr.endswith(os.linesep+ind):
286 if outstr.endswith(os.linesep+ind):
288 return outstr[:-len(ind)]
287 return outstr[:-len(ind)]
289 else:
288 else:
290 return outstr
289 return outstr
291
290
292
291
293 def list_strings(arg):
292 def list_strings(arg):
294 """Always return a list of strings, given a string or list of strings
293 """Always return a list of strings, given a string or list of strings
295 as input.
294 as input.
296
295
297 Examples
296 Examples
298 --------
297 --------
299 ::
298 ::
300
299
301 In [7]: list_strings('A single string')
300 In [7]: list_strings('A single string')
302 Out[7]: ['A single string']
301 Out[7]: ['A single string']
303
302
304 In [8]: list_strings(['A single string in a list'])
303 In [8]: list_strings(['A single string in a list'])
305 Out[8]: ['A single string in a list']
304 Out[8]: ['A single string in a list']
306
305
307 In [9]: list_strings(['A','list','of','strings'])
306 In [9]: list_strings(['A','list','of','strings'])
308 Out[9]: ['A', 'list', 'of', 'strings']
307 Out[9]: ['A', 'list', 'of', 'strings']
309 """
308 """
310
309
311 if isinstance(arg, py3compat.string_types): return [arg]
310 if isinstance(arg, py3compat.string_types): return [arg]
312 else: return arg
311 else: return arg
313
312
314
313
315 def marquee(txt='',width=78,mark='*'):
314 def marquee(txt='',width=78,mark='*'):
316 """Return the input string centered in a 'marquee'.
315 """Return the input string centered in a 'marquee'.
317
316
318 Examples
317 Examples
319 --------
318 --------
320 ::
319 ::
321
320
322 In [16]: marquee('A test',40)
321 In [16]: marquee('A test',40)
323 Out[16]: '**************** A test ****************'
322 Out[16]: '**************** A test ****************'
324
323
325 In [17]: marquee('A test',40,'-')
324 In [17]: marquee('A test',40,'-')
326 Out[17]: '---------------- A test ----------------'
325 Out[17]: '---------------- A test ----------------'
327
326
328 In [18]: marquee('A test',40,' ')
327 In [18]: marquee('A test',40,' ')
329 Out[18]: ' A test '
328 Out[18]: ' A test '
330
329
331 """
330 """
332 if not txt:
331 if not txt:
333 return (mark*width)[:width]
332 return (mark*width)[:width]
334 nmark = (width-len(txt)-2)//len(mark)//2
333 nmark = (width-len(txt)-2)//len(mark)//2
335 if nmark < 0: nmark =0
334 if nmark < 0: nmark =0
336 marks = mark*nmark
335 marks = mark*nmark
337 return '%s %s %s' % (marks,txt,marks)
336 return '%s %s %s' % (marks,txt,marks)
338
337
339
338
340 ini_spaces_re = re.compile(r'^(\s+)')
339 ini_spaces_re = re.compile(r'^(\s+)')
341
340
342 def num_ini_spaces(strng):
341 def num_ini_spaces(strng):
343 """Return the number of initial spaces in a string"""
342 """Return the number of initial spaces in a string"""
344
343
345 ini_spaces = ini_spaces_re.match(strng)
344 ini_spaces = ini_spaces_re.match(strng)
346 if ini_spaces:
345 if ini_spaces:
347 return ini_spaces.end()
346 return ini_spaces.end()
348 else:
347 else:
349 return 0
348 return 0
350
349
351
350
352 def format_screen(strng):
351 def format_screen(strng):
353 """Format a string for screen printing.
352 """Format a string for screen printing.
354
353
355 This removes some latex-type format codes."""
354 This removes some latex-type format codes."""
356 # Paragraph continue
355 # Paragraph continue
357 par_re = re.compile(r'\\$',re.MULTILINE)
356 par_re = re.compile(r'\\$',re.MULTILINE)
358 strng = par_re.sub('',strng)
357 strng = par_re.sub('',strng)
359 return strng
358 return strng
360
359
361
360
362 def dedent(text):
361 def dedent(text):
363 """Equivalent of textwrap.dedent that ignores unindented first line.
362 """Equivalent of textwrap.dedent that ignores unindented first line.
364
363
365 This means it will still dedent strings like:
364 This means it will still dedent strings like:
366 '''foo
365 '''foo
367 is a bar
366 is a bar
368 '''
367 '''
369
368
370 For use in wrap_paragraphs.
369 For use in wrap_paragraphs.
371 """
370 """
372
371
373 if text.startswith('\n'):
372 if text.startswith('\n'):
374 # text starts with blank line, don't ignore the first line
373 # text starts with blank line, don't ignore the first line
375 return textwrap.dedent(text)
374 return textwrap.dedent(text)
376
375
377 # split first line
376 # split first line
378 splits = text.split('\n',1)
377 splits = text.split('\n',1)
379 if len(splits) == 1:
378 if len(splits) == 1:
380 # only one line
379 # only one line
381 return textwrap.dedent(text)
380 return textwrap.dedent(text)
382
381
383 first, rest = splits
382 first, rest = splits
384 # dedent everything but the first line
383 # dedent everything but the first line
385 rest = textwrap.dedent(rest)
384 rest = textwrap.dedent(rest)
386 return '\n'.join([first, rest])
385 return '\n'.join([first, rest])
387
386
388
387
389 def wrap_paragraphs(text, ncols=80):
388 def wrap_paragraphs(text, ncols=80):
390 """Wrap multiple paragraphs to fit a specified width.
389 """Wrap multiple paragraphs to fit a specified width.
391
390
392 This is equivalent to textwrap.wrap, but with support for multiple
391 This is equivalent to textwrap.wrap, but with support for multiple
393 paragraphs, as separated by empty lines.
392 paragraphs, as separated by empty lines.
394
393
395 Returns
394 Returns
396 -------
395 -------
397
396
398 list of complete paragraphs, wrapped to fill `ncols` columns.
397 list of complete paragraphs, wrapped to fill `ncols` columns.
399 """
398 """
400 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
399 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
401 text = dedent(text).strip()
400 text = dedent(text).strip()
402 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
401 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
403 out_ps = []
402 out_ps = []
404 indent_re = re.compile(r'\n\s+', re.MULTILINE)
403 indent_re = re.compile(r'\n\s+', re.MULTILINE)
405 for p in paragraphs:
404 for p in paragraphs:
406 # presume indentation that survives dedent is meaningful formatting,
405 # presume indentation that survives dedent is meaningful formatting,
407 # so don't fill unless text is flush.
406 # so don't fill unless text is flush.
408 if indent_re.search(p) is None:
407 if indent_re.search(p) is None:
409 # wrap paragraph
408 # wrap paragraph
410 p = textwrap.fill(p, ncols)
409 p = textwrap.fill(p, ncols)
411 out_ps.append(p)
410 out_ps.append(p)
412 return out_ps
411 return out_ps
413
412
414
413
415 def long_substr(data):
414 def long_substr(data):
416 """Return the longest common substring in a list of strings.
415 """Return the longest common substring in a list of strings.
417
416
418 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
417 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
419 """
418 """
420 substr = ''
419 substr = ''
421 if len(data) > 1 and len(data[0]) > 0:
420 if len(data) > 1 and len(data[0]) > 0:
422 for i in range(len(data[0])):
421 for i in range(len(data[0])):
423 for j in range(len(data[0])-i+1):
422 for j in range(len(data[0])-i+1):
424 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
423 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
425 substr = data[0][i:i+j]
424 substr = data[0][i:i+j]
426 elif len(data) == 1:
425 elif len(data) == 1:
427 substr = data[0]
426 substr = data[0]
428 return substr
427 return substr
429
428
430
429
431 def strip_email_quotes(text):
430 def strip_email_quotes(text):
432 """Strip leading email quotation characters ('>').
431 """Strip leading email quotation characters ('>').
433
432
434 Removes any combination of leading '>' interspersed with whitespace that
433 Removes any combination of leading '>' interspersed with whitespace that
435 appears *identically* in all lines of the input text.
434 appears *identically* in all lines of the input text.
436
435
437 Parameters
436 Parameters
438 ----------
437 ----------
439 text : str
438 text : str
440
439
441 Examples
440 Examples
442 --------
441 --------
443
442
444 Simple uses::
443 Simple uses::
445
444
446 In [2]: strip_email_quotes('> > text')
445 In [2]: strip_email_quotes('> > text')
447 Out[2]: 'text'
446 Out[2]: 'text'
448
447
449 In [3]: strip_email_quotes('> > text\\n> > more')
448 In [3]: strip_email_quotes('> > text\\n> > more')
450 Out[3]: 'text\\nmore'
449 Out[3]: 'text\\nmore'
451
450
452 Note how only the common prefix that appears in all lines is stripped::
451 Note how only the common prefix that appears in all lines is stripped::
453
452
454 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
453 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
455 Out[4]: '> text\\n> more\\nmore...'
454 Out[4]: '> text\\n> more\\nmore...'
456
455
457 So if any line has no quote marks ('>') , then none are stripped from any
456 So if any line has no quote marks ('>') , then none are stripped from any
458 of them ::
457 of them ::
459
458
460 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
459 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
461 Out[5]: '> > text\\n> > more\\nlast different'
460 Out[5]: '> > text\\n> > more\\nlast different'
462 """
461 """
463 lines = text.splitlines()
462 lines = text.splitlines()
464 matches = set()
463 matches = set()
465 for line in lines:
464 for line in lines:
466 prefix = re.match(r'^(\s*>[ >]*)', line)
465 prefix = re.match(r'^(\s*>[ >]*)', line)
467 if prefix:
466 if prefix:
468 matches.add(prefix.group(1))
467 matches.add(prefix.group(1))
469 else:
468 else:
470 break
469 break
471 else:
470 else:
472 prefix = long_substr(list(matches))
471 prefix = long_substr(list(matches))
473 if prefix:
472 if prefix:
474 strip = len(prefix)
473 strip = len(prefix)
475 text = '\n'.join([ ln[strip:] for ln in lines])
474 text = '\n'.join([ ln[strip:] for ln in lines])
476 return text
475 return text
477
476
478 def strip_ansi(source):
477 def strip_ansi(source):
479 """
478 """
480 Remove ansi escape codes from text.
479 Remove ansi escape codes from text.
481
480
482 Parameters
481 Parameters
483 ----------
482 ----------
484 source : str
483 source : str
485 Source to remove the ansi from
484 Source to remove the ansi from
486 """
485 """
487 return re.sub(r'\033\[(\d|;)+?m', '', source)
486 return re.sub(r'\033\[(\d|;)+?m', '', source)
488
487
489
488
490 class EvalFormatter(Formatter):
489 class EvalFormatter(Formatter):
491 """A String Formatter that allows evaluation of simple expressions.
490 """A String Formatter that allows evaluation of simple expressions.
492
491
493 Note that this version interprets a : as specifying a format string (as per
492 Note that this version interprets a : as specifying a format string (as per
494 standard string formatting), so if slicing is required, you must explicitly
493 standard string formatting), so if slicing is required, you must explicitly
495 create a slice.
494 create a slice.
496
495
497 This is to be used in templating cases, such as the parallel batch
496 This is to be used in templating cases, such as the parallel batch
498 script templates, where simple arithmetic on arguments is useful.
497 script templates, where simple arithmetic on arguments is useful.
499
498
500 Examples
499 Examples
501 --------
500 --------
502 ::
501 ::
503
502
504 In [1]: f = EvalFormatter()
503 In [1]: f = EvalFormatter()
505 In [2]: f.format('{n//4}', n=8)
504 In [2]: f.format('{n//4}', n=8)
506 Out[2]: '2'
505 Out[2]: '2'
507
506
508 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
507 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
509 Out[3]: 'll'
508 Out[3]: 'll'
510 """
509 """
511 def get_field(self, name, args, kwargs):
510 def get_field(self, name, args, kwargs):
512 v = eval(name, kwargs)
511 v = eval(name, kwargs)
513 return v, name
512 return v, name
514
513
515 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
514 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
516 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
515 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
517 # above, it should be possible to remove FullEvalFormatter.
516 # above, it should be possible to remove FullEvalFormatter.
518
517
519 @skip_doctest
520 class FullEvalFormatter(Formatter):
518 class FullEvalFormatter(Formatter):
521 """A String Formatter that allows evaluation of simple expressions.
519 """A String Formatter that allows evaluation of simple expressions.
522
520
523 Any time a format key is not found in the kwargs,
521 Any time a format key is not found in the kwargs,
524 it will be tried as an expression in the kwargs namespace.
522 it will be tried as an expression in the kwargs namespace.
525
523
526 Note that this version allows slicing using [1:2], so you cannot specify
524 Note that this version allows slicing using [1:2], so you cannot specify
527 a format string. Use :class:`EvalFormatter` to permit format strings.
525 a format string. Use :class:`EvalFormatter` to permit format strings.
528
526
529 Examples
527 Examples
530 --------
528 --------
531 ::
529 ::
532
530
533 In [1]: f = FullEvalFormatter()
531 In [1]: f = FullEvalFormatter()
534 In [2]: f.format('{n//4}', n=8)
532 In [2]: f.format('{n//4}', n=8)
535 Out[2]: u'2'
533 Out[2]: '2'
536
534
537 In [3]: f.format('{list(range(5))[2:4]}')
535 In [3]: f.format('{list(range(5))[2:4]}')
538 Out[3]: u'[2, 3]'
536 Out[3]: '[2, 3]'
539
537
540 In [4]: f.format('{3*2}')
538 In [4]: f.format('{3*2}')
541 Out[4]: u'6'
539 Out[4]: '6'
542 """
540 """
543 # copied from Formatter._vformat with minor changes to allow eval
541 # copied from Formatter._vformat with minor changes to allow eval
544 # and replace the format_spec code with slicing
542 # and replace the format_spec code with slicing
545 def vformat(self, format_string, args, kwargs):
543 def vformat(self, format_string, args, kwargs):
546 result = []
544 result = []
547 for literal_text, field_name, format_spec, conversion in \
545 for literal_text, field_name, format_spec, conversion in \
548 self.parse(format_string):
546 self.parse(format_string):
549
547
550 # output the literal text
548 # output the literal text
551 if literal_text:
549 if literal_text:
552 result.append(literal_text)
550 result.append(literal_text)
553
551
554 # if there's a field, output it
552 # if there's a field, output it
555 if field_name is not None:
553 if field_name is not None:
556 # this is some markup, find the object and do
554 # this is some markup, find the object and do
557 # the formatting
555 # the formatting
558
556
559 if format_spec:
557 if format_spec:
560 # override format spec, to allow slicing:
558 # override format spec, to allow slicing:
561 field_name = ':'.join([field_name, format_spec])
559 field_name = ':'.join([field_name, format_spec])
562
560
563 # eval the contents of the field for the object
561 # eval the contents of the field for the object
564 # to be formatted
562 # to be formatted
565 obj = eval(field_name, kwargs)
563 obj = eval(field_name, kwargs)
566
564
567 # do any conversion on the resulting object
565 # do any conversion on the resulting object
568 obj = self.convert_field(obj, conversion)
566 obj = self.convert_field(obj, conversion)
569
567
570 # format the object and append to the result
568 # format the object and append to the result
571 result.append(self.format_field(obj, ''))
569 result.append(self.format_field(obj, ''))
572
570
573 return u''.join(py3compat.cast_unicode(s) for s in result)
571 return u''.join(py3compat.cast_unicode(s) for s in result)
574
572
575
573
576 @skip_doctest
577 class DollarFormatter(FullEvalFormatter):
574 class DollarFormatter(FullEvalFormatter):
578 """Formatter allowing Itpl style $foo replacement, for names and attribute
575 """Formatter allowing Itpl style $foo replacement, for names and attribute
579 access only. Standard {foo} replacement also works, and allows full
576 access only. Standard {foo} replacement also works, and allows full
580 evaluation of its arguments.
577 evaluation of its arguments.
581
578
582 Examples
579 Examples
583 --------
580 --------
584 ::
581 ::
585
582
586 In [1]: f = DollarFormatter()
583 In [1]: f = DollarFormatter()
587 In [2]: f.format('{n//4}', n=8)
584 In [2]: f.format('{n//4}', n=8)
588 Out[2]: u'2'
585 Out[2]: '2'
589
586
590 In [3]: f.format('23 * 76 is $result', result=23*76)
587 In [3]: f.format('23 * 76 is $result', result=23*76)
591 Out[3]: u'23 * 76 is 1748'
588 Out[3]: '23 * 76 is 1748'
592
589
593 In [4]: f.format('$a or {b}', a=1, b=2)
590 In [4]: f.format('$a or {b}', a=1, b=2)
594 Out[4]: u'1 or 2'
591 Out[4]: '1 or 2'
595 """
592 """
596 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
593 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
597 def parse(self, fmt_string):
594 def parse(self, fmt_string):
598 for literal_txt, field_name, format_spec, conversion \
595 for literal_txt, field_name, format_spec, conversion \
599 in Formatter.parse(self, fmt_string):
596 in Formatter.parse(self, fmt_string):
600
597
601 # Find $foo patterns in the literal text.
598 # Find $foo patterns in the literal text.
602 continue_from = 0
599 continue_from = 0
603 txt = ""
600 txt = ""
604 for m in self._dollar_pattern.finditer(literal_txt):
601 for m in self._dollar_pattern.finditer(literal_txt):
605 new_txt, new_field = m.group(1,2)
602 new_txt, new_field = m.group(1,2)
606 # $$foo --> $foo
603 # $$foo --> $foo
607 if new_field.startswith("$"):
604 if new_field.startswith("$"):
608 txt += new_txt + new_field
605 txt += new_txt + new_field
609 else:
606 else:
610 yield (txt + new_txt, new_field, "", None)
607 yield (txt + new_txt, new_field, "", None)
611 txt = ""
608 txt = ""
612 continue_from = m.end()
609 continue_from = m.end()
613
610
614 # Re-yield the {foo} style pattern
611 # Re-yield the {foo} style pattern
615 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
612 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
616
613
617 #-----------------------------------------------------------------------------
614 #-----------------------------------------------------------------------------
618 # Utils to columnize a list of string
615 # Utils to columnize a list of string
619 #-----------------------------------------------------------------------------
616 #-----------------------------------------------------------------------------
620
617
621 def _col_chunks(l, max_rows, row_first=False):
618 def _col_chunks(l, max_rows, row_first=False):
622 """Yield successive max_rows-sized column chunks from l."""
619 """Yield successive max_rows-sized column chunks from l."""
623 if row_first:
620 if row_first:
624 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
621 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
625 for i in py3compat.xrange(ncols):
622 for i in py3compat.xrange(ncols):
626 yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
623 yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
627 else:
624 else:
628 for i in py3compat.xrange(0, len(l), max_rows):
625 for i in py3compat.xrange(0, len(l), max_rows):
629 yield l[i:(i + max_rows)]
626 yield l[i:(i + max_rows)]
630
627
631
628
632 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
629 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
633 """Calculate optimal info to columnize a list of string"""
630 """Calculate optimal info to columnize a list of string"""
634 for max_rows in range(1, len(rlist) + 1):
631 for max_rows in range(1, len(rlist) + 1):
635 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
632 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
636 sumlength = sum(col_widths)
633 sumlength = sum(col_widths)
637 ncols = len(col_widths)
634 ncols = len(col_widths)
638 if sumlength + separator_size * (ncols - 1) <= displaywidth:
635 if sumlength + separator_size * (ncols - 1) <= displaywidth:
639 break
636 break
640 return {'num_columns': ncols,
637 return {'num_columns': ncols,
641 'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
638 'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
642 'max_rows': max_rows,
639 'max_rows': max_rows,
643 'column_widths': col_widths
640 'column_widths': col_widths
644 }
641 }
645
642
646
643
647 def _get_or_default(mylist, i, default=None):
644 def _get_or_default(mylist, i, default=None):
648 """return list item number, or default if don't exist"""
645 """return list item number, or default if don't exist"""
649 if i >= len(mylist):
646 if i >= len(mylist):
650 return default
647 return default
651 else :
648 else :
652 return mylist[i]
649 return mylist[i]
653
650
654
651
655 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
652 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
656 """Returns a nested list, and info to columnize items
653 """Returns a nested list, and info to columnize items
657
654
658 Parameters
655 Parameters
659 ----------
656 ----------
660
657
661 items
658 items
662 list of strings to columize
659 list of strings to columize
663 row_first : (default False)
660 row_first : (default False)
664 Whether to compute columns for a row-first matrix instead of
661 Whether to compute columns for a row-first matrix instead of
665 column-first (default).
662 column-first (default).
666 empty : (default None)
663 empty : (default None)
667 default value to fill list if needed
664 default value to fill list if needed
668 separator_size : int (default=2)
665 separator_size : int (default=2)
669 How much caracters will be used as a separation between each columns.
666 How much caracters will be used as a separation between each columns.
670 displaywidth : int (default=80)
667 displaywidth : int (default=80)
671 The width of the area onto wich the columns should enter
668 The width of the area onto wich the columns should enter
672
669
673 Returns
670 Returns
674 -------
671 -------
675
672
676 strings_matrix
673 strings_matrix
677
674
678 nested list of string, the outer most list contains as many list as
675 nested list of string, the outer most list contains as many list as
679 rows, the innermost lists have each as many element as colums. If the
676 rows, the innermost lists have each as many element as colums. If the
680 total number of elements in `items` does not equal the product of
677 total number of elements in `items` does not equal the product of
681 rows*columns, the last element of some lists are filled with `None`.
678 rows*columns, the last element of some lists are filled with `None`.
682
679
683 dict_info
680 dict_info
684 some info to make columnize easier:
681 some info to make columnize easier:
685
682
686 num_columns
683 num_columns
687 number of columns
684 number of columns
688 max_rows
685 max_rows
689 maximum number of rows (final number may be less)
686 maximum number of rows (final number may be less)
690 column_widths
687 column_widths
691 list of with of each columns
688 list of with of each columns
692 optimal_separator_width
689 optimal_separator_width
693 best separator width between columns
690 best separator width between columns
694
691
695 Examples
692 Examples
696 --------
693 --------
697 ::
694 ::
698
695
699 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
696 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
700 ...: compute_item_matrix(l, displaywidth=12)
697 ...: compute_item_matrix(l, displaywidth=12)
701 Out[1]:
698 Out[1]:
702 ([['aaa', 'f', 'k'],
699 ([['aaa', 'f', 'k'],
703 ['b', 'g', 'l'],
700 ['b', 'g', 'l'],
704 ['cc', 'h', None],
701 ['cc', 'h', None],
705 ['d', 'i', None],
702 ['d', 'i', None],
706 ['eeeee', 'j', None]],
703 ['eeeee', 'j', None]],
707 {'num_columns': 3,
704 {'num_columns': 3,
708 'column_widths': [5, 1, 1],
705 'column_widths': [5, 1, 1],
709 'optimal_separator_width': 2,
706 'optimal_separator_width': 2,
710 'max_rows': 5})
707 'max_rows': 5})
711 """
708 """
712 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
709 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
713 nrow, ncol = info['max_rows'], info['num_columns']
710 nrow, ncol = info['max_rows'], info['num_columns']
714 if row_first:
711 if row_first:
715 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
712 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
716 else:
713 else:
717 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
714 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
718
715
719
716
720 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
717 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
721 """ Transform a list of strings into a single string with columns.
718 """ Transform a list of strings into a single string with columns.
722
719
723 Parameters
720 Parameters
724 ----------
721 ----------
725 items : sequence of strings
722 items : sequence of strings
726 The strings to process.
723 The strings to process.
727
724
728 row_first : (default False)
725 row_first : (default False)
729 Whether to compute columns for a row-first matrix instead of
726 Whether to compute columns for a row-first matrix instead of
730 column-first (default).
727 column-first (default).
731
728
732 separator : str, optional [default is two spaces]
729 separator : str, optional [default is two spaces]
733 The string that separates columns.
730 The string that separates columns.
734
731
735 displaywidth : int, optional [default is 80]
732 displaywidth : int, optional [default is 80]
736 Width of the display in number of characters.
733 Width of the display in number of characters.
737
734
738 Returns
735 Returns
739 -------
736 -------
740 The formatted string.
737 The formatted string.
741 """
738 """
742 if not items:
739 if not items:
743 return '\n'
740 return '\n'
744 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
741 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
745 if spread:
742 if spread:
746 separator = separator.ljust(int(info['optimal_separator_width']))
743 separator = separator.ljust(int(info['optimal_separator_width']))
747 fmatrix = [filter(None, x) for x in matrix]
744 fmatrix = [filter(None, x) for x in matrix]
748 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
745 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
749 return '\n'.join(map(sjoin, fmatrix))+'\n'
746 return '\n'.join(map(sjoin, fmatrix))+'\n'
750
747
751
748
752 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
749 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
753 """
750 """
754 Return a string with a natural enumeration of items
751 Return a string with a natural enumeration of items
755
752
756 >>> get_text_list(['a', 'b', 'c', 'd'])
753 >>> get_text_list(['a', 'b', 'c', 'd'])
757 'a, b, c and d'
754 'a, b, c and d'
758 >>> get_text_list(['a', 'b', 'c'], ' or ')
755 >>> get_text_list(['a', 'b', 'c'], ' or ')
759 'a, b or c'
756 'a, b or c'
760 >>> get_text_list(['a', 'b', 'c'], ', ')
757 >>> get_text_list(['a', 'b', 'c'], ', ')
761 'a, b, c'
758 'a, b, c'
762 >>> get_text_list(['a', 'b'], ' or ')
759 >>> get_text_list(['a', 'b'], ' or ')
763 'a or b'
760 'a or b'
764 >>> get_text_list(['a'])
761 >>> get_text_list(['a'])
765 'a'
762 'a'
766 >>> get_text_list([])
763 >>> get_text_list([])
767 ''
764 ''
768 >>> get_text_list(['a', 'b'], wrap_item_with="`")
765 >>> get_text_list(['a', 'b'], wrap_item_with="`")
769 '`a` and `b`'
766 '`a` and `b`'
770 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
767 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
771 'a + b + c = d'
768 'a + b + c = d'
772 """
769 """
773 if len(list_) == 0:
770 if len(list_) == 0:
774 return ''
771 return ''
775 if wrap_item_with:
772 if wrap_item_with:
776 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
773 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
777 item in list_]
774 item in list_]
778 if len(list_) == 1:
775 if len(list_) == 1:
779 return list_[0]
776 return list_[0]
780 return '%s%s%s' % (
777 return '%s%s%s' % (
781 sep.join(i for i in list_[:-1]),
778 sep.join(i for i in list_[:-1]),
782 last_sep, list_[-1])
779 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now