##// END OF EJS Templates
Remove more unused functions from utils.text
Thomas Kluyver -
Show More
@@ -1,781 +1,717 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2008-2011 The IPython Development Team
12 # Copyright (C) 2008-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21
21
22 import __main__
22 import __main__
23
23
24 import os
24 import os
25 import re
25 import re
26 import sys
26 import sys
27 import textwrap
27 import textwrap
28 from string import Formatter
28 from string import Formatter
29
29
30 from IPython.external.path import path
30 from IPython.external.path import path
31 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
31 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
32 from IPython.utils import py3compat
32 from IPython.utils import py3compat
33 from IPython.utils.data import flatten
33 from IPython.utils.data import flatten
34
34
35 #-----------------------------------------------------------------------------
35 #-----------------------------------------------------------------------------
36 # Code
36 # Code
37 #-----------------------------------------------------------------------------
37 #-----------------------------------------------------------------------------
38
38
39 def unquote_ends(istr):
40 """Remove a single pair of quotes from the endpoints of a string."""
41
42 if not istr:
43 return istr
44 if (istr[0]=="'" and istr[-1]=="'") or \
45 (istr[0]=='"' and istr[-1]=='"'):
46 return istr[1:-1]
47 else:
48 return istr
49
50
51 class LSString(str):
39 class LSString(str):
52 """String derivative with a special access attributes.
40 """String derivative with a special access attributes.
53
41
54 These are normal strings, but with the special attributes:
42 These are normal strings, but with the special attributes:
55
43
56 .l (or .list) : value as list (split on newlines).
44 .l (or .list) : value as list (split on newlines).
57 .n (or .nlstr): original value (the string itself).
45 .n (or .nlstr): original value (the string itself).
58 .s (or .spstr): value as whitespace-separated string.
46 .s (or .spstr): value as whitespace-separated string.
59 .p (or .paths): list of path objects
47 .p (or .paths): list of path objects
60
48
61 Any values which require transformations are computed only once and
49 Any values which require transformations are computed only once and
62 cached.
50 cached.
63
51
64 Such strings are very useful to efficiently interact with the shell, which
52 Such strings are very useful to efficiently interact with the shell, which
65 typically only understands whitespace-separated options for commands."""
53 typically only understands whitespace-separated options for commands."""
66
54
67 def get_list(self):
55 def get_list(self):
68 try:
56 try:
69 return self.__list
57 return self.__list
70 except AttributeError:
58 except AttributeError:
71 self.__list = self.split('\n')
59 self.__list = self.split('\n')
72 return self.__list
60 return self.__list
73
61
74 l = list = property(get_list)
62 l = list = property(get_list)
75
63
76 def get_spstr(self):
64 def get_spstr(self):
77 try:
65 try:
78 return self.__spstr
66 return self.__spstr
79 except AttributeError:
67 except AttributeError:
80 self.__spstr = self.replace('\n',' ')
68 self.__spstr = self.replace('\n',' ')
81 return self.__spstr
69 return self.__spstr
82
70
83 s = spstr = property(get_spstr)
71 s = spstr = property(get_spstr)
84
72
85 def get_nlstr(self):
73 def get_nlstr(self):
86 return self
74 return self
87
75
88 n = nlstr = property(get_nlstr)
76 n = nlstr = property(get_nlstr)
89
77
90 def get_paths(self):
78 def get_paths(self):
91 try:
79 try:
92 return self.__paths
80 return self.__paths
93 except AttributeError:
81 except AttributeError:
94 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
82 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
95 return self.__paths
83 return self.__paths
96
84
97 p = paths = property(get_paths)
85 p = paths = property(get_paths)
98
86
99 # FIXME: We need to reimplement type specific displayhook and then add this
87 # FIXME: We need to reimplement type specific displayhook and then add this
100 # back as a custom printer. This should also be moved outside utils into the
88 # back as a custom printer. This should also be moved outside utils into the
101 # core.
89 # core.
102
90
103 # def print_lsstring(arg):
91 # def print_lsstring(arg):
104 # """ Prettier (non-repr-like) and more informative printer for LSString """
92 # """ Prettier (non-repr-like) and more informative printer for LSString """
105 # print "LSString (.p, .n, .l, .s available). Value:"
93 # print "LSString (.p, .n, .l, .s available). Value:"
106 # print arg
94 # print arg
107 #
95 #
108 #
96 #
109 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
97 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
110
98
111
99
112 class SList(list):
100 class SList(list):
113 """List derivative with a special access attributes.
101 """List derivative with a special access attributes.
114
102
115 These are normal lists, but with the special attributes:
103 These are normal lists, but with the special attributes:
116
104
117 .l (or .list) : value as list (the list itself).
105 .l (or .list) : value as list (the list itself).
118 .n (or .nlstr): value as a string, joined on newlines.
106 .n (or .nlstr): value as a string, joined on newlines.
119 .s (or .spstr): value as a string, joined on spaces.
107 .s (or .spstr): value as a string, joined on spaces.
120 .p (or .paths): list of path objects
108 .p (or .paths): list of path objects
121
109
122 Any values which require transformations are computed only once and
110 Any values which require transformations are computed only once and
123 cached."""
111 cached."""
124
112
125 def get_list(self):
113 def get_list(self):
126 return self
114 return self
127
115
128 l = list = property(get_list)
116 l = list = property(get_list)
129
117
130 def get_spstr(self):
118 def get_spstr(self):
131 try:
119 try:
132 return self.__spstr
120 return self.__spstr
133 except AttributeError:
121 except AttributeError:
134 self.__spstr = ' '.join(self)
122 self.__spstr = ' '.join(self)
135 return self.__spstr
123 return self.__spstr
136
124
137 s = spstr = property(get_spstr)
125 s = spstr = property(get_spstr)
138
126
139 def get_nlstr(self):
127 def get_nlstr(self):
140 try:
128 try:
141 return self.__nlstr
129 return self.__nlstr
142 except AttributeError:
130 except AttributeError:
143 self.__nlstr = '\n'.join(self)
131 self.__nlstr = '\n'.join(self)
144 return self.__nlstr
132 return self.__nlstr
145
133
146 n = nlstr = property(get_nlstr)
134 n = nlstr = property(get_nlstr)
147
135
148 def get_paths(self):
136 def get_paths(self):
149 try:
137 try:
150 return self.__paths
138 return self.__paths
151 except AttributeError:
139 except AttributeError:
152 self.__paths = [path(p) for p in self if os.path.exists(p)]
140 self.__paths = [path(p) for p in self if os.path.exists(p)]
153 return self.__paths
141 return self.__paths
154
142
155 p = paths = property(get_paths)
143 p = paths = property(get_paths)
156
144
157 def grep(self, pattern, prune = False, field = None):
145 def grep(self, pattern, prune = False, field = None):
158 """ Return all strings matching 'pattern' (a regex or callable)
146 """ Return all strings matching 'pattern' (a regex or callable)
159
147
160 This is case-insensitive. If prune is true, return all items
148 This is case-insensitive. If prune is true, return all items
161 NOT matching the pattern.
149 NOT matching the pattern.
162
150
163 If field is specified, the match must occur in the specified
151 If field is specified, the match must occur in the specified
164 whitespace-separated field.
152 whitespace-separated field.
165
153
166 Examples::
154 Examples::
167
155
168 a.grep( lambda x: x.startswith('C') )
156 a.grep( lambda x: x.startswith('C') )
169 a.grep('Cha.*log', prune=1)
157 a.grep('Cha.*log', prune=1)
170 a.grep('chm', field=-1)
158 a.grep('chm', field=-1)
171 """
159 """
172
160
173 def match_target(s):
161 def match_target(s):
174 if field is None:
162 if field is None:
175 return s
163 return s
176 parts = s.split()
164 parts = s.split()
177 try:
165 try:
178 tgt = parts[field]
166 tgt = parts[field]
179 return tgt
167 return tgt
180 except IndexError:
168 except IndexError:
181 return ""
169 return ""
182
170
183 if isinstance(pattern, basestring):
171 if isinstance(pattern, basestring):
184 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
172 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
185 else:
173 else:
186 pred = pattern
174 pred = pattern
187 if not prune:
175 if not prune:
188 return SList([el for el in self if pred(match_target(el))])
176 return SList([el for el in self if pred(match_target(el))])
189 else:
177 else:
190 return SList([el for el in self if not pred(match_target(el))])
178 return SList([el for el in self if not pred(match_target(el))])
191
179
192 def fields(self, *fields):
180 def fields(self, *fields):
193 """ Collect whitespace-separated fields from string list
181 """ Collect whitespace-separated fields from string list
194
182
195 Allows quick awk-like usage of string lists.
183 Allows quick awk-like usage of string lists.
196
184
197 Example data (in var a, created by 'a = !ls -l')::
185 Example data (in var a, created by 'a = !ls -l')::
198 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
186 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
199 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
187 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
200
188
201 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
189 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
202 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
190 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
203 (note the joining by space).
191 (note the joining by space).
204 a.fields(-1) is ['ChangeLog', 'IPython']
192 a.fields(-1) is ['ChangeLog', 'IPython']
205
193
206 IndexErrors are ignored.
194 IndexErrors are ignored.
207
195
208 Without args, fields() just split()'s the strings.
196 Without args, fields() just split()'s the strings.
209 """
197 """
210 if len(fields) == 0:
198 if len(fields) == 0:
211 return [el.split() for el in self]
199 return [el.split() for el in self]
212
200
213 res = SList()
201 res = SList()
214 for el in [f.split() for f in self]:
202 for el in [f.split() for f in self]:
215 lineparts = []
203 lineparts = []
216
204
217 for fd in fields:
205 for fd in fields:
218 try:
206 try:
219 lineparts.append(el[fd])
207 lineparts.append(el[fd])
220 except IndexError:
208 except IndexError:
221 pass
209 pass
222 if lineparts:
210 if lineparts:
223 res.append(" ".join(lineparts))
211 res.append(" ".join(lineparts))
224
212
225 return res
213 return res
226
214
227 def sort(self,field= None, nums = False):
215 def sort(self,field= None, nums = False):
228 """ sort by specified fields (see fields())
216 """ sort by specified fields (see fields())
229
217
230 Example::
218 Example::
231 a.sort(1, nums = True)
219 a.sort(1, nums = True)
232
220
233 Sorts a by second field, in numerical order (so that 21 > 3)
221 Sorts a by second field, in numerical order (so that 21 > 3)
234
222
235 """
223 """
236
224
237 #decorate, sort, undecorate
225 #decorate, sort, undecorate
238 if field is not None:
226 if field is not None:
239 dsu = [[SList([line]).fields(field), line] for line in self]
227 dsu = [[SList([line]).fields(field), line] for line in self]
240 else:
228 else:
241 dsu = [[line, line] for line in self]
229 dsu = [[line, line] for line in self]
242 if nums:
230 if nums:
243 for i in range(len(dsu)):
231 for i in range(len(dsu)):
244 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
232 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
245 try:
233 try:
246 n = int(numstr)
234 n = int(numstr)
247 except ValueError:
235 except ValueError:
248 n = 0;
236 n = 0;
249 dsu[i][0] = n
237 dsu[i][0] = n
250
238
251
239
252 dsu.sort()
240 dsu.sort()
253 return SList([t[1] for t in dsu])
241 return SList([t[1] for t in dsu])
254
242
255
243
256 # FIXME: We need to reimplement type specific displayhook and then add this
244 # FIXME: We need to reimplement type specific displayhook and then add this
257 # back as a custom printer. This should also be moved outside utils into the
245 # back as a custom printer. This should also be moved outside utils into the
258 # core.
246 # core.
259
247
260 # def print_slist(arg):
248 # def print_slist(arg):
261 # """ Prettier (non-repr-like) and more informative printer for SList """
249 # """ Prettier (non-repr-like) and more informative printer for SList """
262 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
250 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
263 # if hasattr(arg, 'hideonce') and arg.hideonce:
251 # if hasattr(arg, 'hideonce') and arg.hideonce:
264 # arg.hideonce = False
252 # arg.hideonce = False
265 # return
253 # return
266 #
254 #
267 # nlprint(arg) # This was a nested list printer, now removed.
255 # nlprint(arg) # This was a nested list printer, now removed.
268 #
256 #
269 # print_slist = result_display.when_type(SList)(print_slist)
257 # print_slist = result_display.when_type(SList)(print_slist)
270
258
271
259
272 def esc_quotes(strng):
273 """Return the input string with single and double quotes escaped out"""
274
275 return strng.replace('"','\\"').replace("'","\\'")
276
277
278 def qw(words,flat=0,sep=None,maxsplit=-1):
279 """Similar to Perl's qw() operator, but with some more options.
280
281 qw(words,flat=0,sep=' ',maxsplit=-1) -> words.split(sep,maxsplit)
282
283 words can also be a list itself, and with flat=1, the output will be
284 recursively flattened.
285
286 Examples:
287
288 >>> qw('1 2')
289 ['1', '2']
290
291 >>> qw(['a b','1 2',['m n','p q']])
292 [['a', 'b'], ['1', '2'], [['m', 'n'], ['p', 'q']]]
293
294 >>> qw(['a b','1 2',['m n','p q']],flat=1)
295 ['a', 'b', '1', '2', 'm', 'n', 'p', 'q']
296 """
297
298 if isinstance(words, basestring):
299 return [word.strip() for word in words.split(sep,maxsplit)
300 if word and not word.isspace() ]
301 if flat:
302 return flatten(map(qw,words,[1]*len(words)))
303 return map(qw,words)
304
305
306 def qwflat(words,sep=None,maxsplit=-1):
307 """Calls qw(words) in flat mode. It's just a convenient shorthand."""
308 return qw(words,1,sep,maxsplit)
309
310
311 def qw_lol(indata):
312 """qw_lol('a b') -> [['a','b']],
313 otherwise it's just a call to qw().
314
315 We need this to make sure the modules_some keys *always* end up as a
316 list of lists."""
317
318 if isinstance(indata, basestring):
319 return [qw(indata)]
320 else:
321 return qw(indata)
322
323
324 def indent(instr,nspaces=4, ntabs=0, flatten=False):
260 def indent(instr,nspaces=4, ntabs=0, flatten=False):
325 """Indent a string a given number of spaces or tabstops.
261 """Indent a string a given number of spaces or tabstops.
326
262
327 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
263 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
328
264
329 Parameters
265 Parameters
330 ----------
266 ----------
331
267
332 instr : basestring
268 instr : basestring
333 The string to be indented.
269 The string to be indented.
334 nspaces : int (default: 4)
270 nspaces : int (default: 4)
335 The number of spaces to be indented.
271 The number of spaces to be indented.
336 ntabs : int (default: 0)
272 ntabs : int (default: 0)
337 The number of tabs to be indented.
273 The number of tabs to be indented.
338 flatten : bool (default: False)
274 flatten : bool (default: False)
339 Whether to scrub existing indentation. If True, all lines will be
275 Whether to scrub existing indentation. If True, all lines will be
340 aligned to the same indentation. If False, existing indentation will
276 aligned to the same indentation. If False, existing indentation will
341 be strictly increased.
277 be strictly increased.
342
278
343 Returns
279 Returns
344 -------
280 -------
345
281
346 str|unicode : string indented by ntabs and nspaces.
282 str|unicode : string indented by ntabs and nspaces.
347
283
348 """
284 """
349 if instr is None:
285 if instr is None:
350 return
286 return
351 ind = '\t'*ntabs+' '*nspaces
287 ind = '\t'*ntabs+' '*nspaces
352 if flatten:
288 if flatten:
353 pat = re.compile(r'^\s*', re.MULTILINE)
289 pat = re.compile(r'^\s*', re.MULTILINE)
354 else:
290 else:
355 pat = re.compile(r'^', re.MULTILINE)
291 pat = re.compile(r'^', re.MULTILINE)
356 outstr = re.sub(pat, ind, instr)
292 outstr = re.sub(pat, ind, instr)
357 if outstr.endswith(os.linesep+ind):
293 if outstr.endswith(os.linesep+ind):
358 return outstr[:-len(ind)]
294 return outstr[:-len(ind)]
359 else:
295 else:
360 return outstr
296 return outstr
361
297
362
298
363 def list_strings(arg):
299 def list_strings(arg):
364 """Always return a list of strings, given a string or list of strings
300 """Always return a list of strings, given a string or list of strings
365 as input.
301 as input.
366
302
367 :Examples:
303 :Examples:
368
304
369 In [7]: list_strings('A single string')
305 In [7]: list_strings('A single string')
370 Out[7]: ['A single string']
306 Out[7]: ['A single string']
371
307
372 In [8]: list_strings(['A single string in a list'])
308 In [8]: list_strings(['A single string in a list'])
373 Out[8]: ['A single string in a list']
309 Out[8]: ['A single string in a list']
374
310
375 In [9]: list_strings(['A','list','of','strings'])
311 In [9]: list_strings(['A','list','of','strings'])
376 Out[9]: ['A', 'list', 'of', 'strings']
312 Out[9]: ['A', 'list', 'of', 'strings']
377 """
313 """
378
314
379 if isinstance(arg,basestring): return [arg]
315 if isinstance(arg,basestring): return [arg]
380 else: return arg
316 else: return arg
381
317
382
318
383 def marquee(txt='',width=78,mark='*'):
319 def marquee(txt='',width=78,mark='*'):
384 """Return the input string centered in a 'marquee'.
320 """Return the input string centered in a 'marquee'.
385
321
386 :Examples:
322 :Examples:
387
323
388 In [16]: marquee('A test',40)
324 In [16]: marquee('A test',40)
389 Out[16]: '**************** A test ****************'
325 Out[16]: '**************** A test ****************'
390
326
391 In [17]: marquee('A test',40,'-')
327 In [17]: marquee('A test',40,'-')
392 Out[17]: '---------------- A test ----------------'
328 Out[17]: '---------------- A test ----------------'
393
329
394 In [18]: marquee('A test',40,' ')
330 In [18]: marquee('A test',40,' ')
395 Out[18]: ' A test '
331 Out[18]: ' A test '
396
332
397 """
333 """
398 if not txt:
334 if not txt:
399 return (mark*width)[:width]
335 return (mark*width)[:width]
400 nmark = (width-len(txt)-2)//len(mark)//2
336 nmark = (width-len(txt)-2)//len(mark)//2
401 if nmark < 0: nmark =0
337 if nmark < 0: nmark =0
402 marks = mark*nmark
338 marks = mark*nmark
403 return '%s %s %s' % (marks,txt,marks)
339 return '%s %s %s' % (marks,txt,marks)
404
340
405
341
406 ini_spaces_re = re.compile(r'^(\s+)')
342 ini_spaces_re = re.compile(r'^(\s+)')
407
343
408 def num_ini_spaces(strng):
344 def num_ini_spaces(strng):
409 """Return the number of initial spaces in a string"""
345 """Return the number of initial spaces in a string"""
410
346
411 ini_spaces = ini_spaces_re.match(strng)
347 ini_spaces = ini_spaces_re.match(strng)
412 if ini_spaces:
348 if ini_spaces:
413 return ini_spaces.end()
349 return ini_spaces.end()
414 else:
350 else:
415 return 0
351 return 0
416
352
417
353
418 def format_screen(strng):
354 def format_screen(strng):
419 """Format a string for screen printing.
355 """Format a string for screen printing.
420
356
421 This removes some latex-type format codes."""
357 This removes some latex-type format codes."""
422 # Paragraph continue
358 # Paragraph continue
423 par_re = re.compile(r'\\$',re.MULTILINE)
359 par_re = re.compile(r'\\$',re.MULTILINE)
424 strng = par_re.sub('',strng)
360 strng = par_re.sub('',strng)
425 return strng
361 return strng
426
362
427
363
428 def dedent(text):
364 def dedent(text):
429 """Equivalent of textwrap.dedent that ignores unindented first line.
365 """Equivalent of textwrap.dedent that ignores unindented first line.
430
366
431 This means it will still dedent strings like:
367 This means it will still dedent strings like:
432 '''foo
368 '''foo
433 is a bar
369 is a bar
434 '''
370 '''
435
371
436 For use in wrap_paragraphs.
372 For use in wrap_paragraphs.
437 """
373 """
438
374
439 if text.startswith('\n'):
375 if text.startswith('\n'):
440 # text starts with blank line, don't ignore the first line
376 # text starts with blank line, don't ignore the first line
441 return textwrap.dedent(text)
377 return textwrap.dedent(text)
442
378
443 # split first line
379 # split first line
444 splits = text.split('\n',1)
380 splits = text.split('\n',1)
445 if len(splits) == 1:
381 if len(splits) == 1:
446 # only one line
382 # only one line
447 return textwrap.dedent(text)
383 return textwrap.dedent(text)
448
384
449 first, rest = splits
385 first, rest = splits
450 # dedent everything but the first line
386 # dedent everything but the first line
451 rest = textwrap.dedent(rest)
387 rest = textwrap.dedent(rest)
452 return '\n'.join([first, rest])
388 return '\n'.join([first, rest])
453
389
454
390
455 def wrap_paragraphs(text, ncols=80):
391 def wrap_paragraphs(text, ncols=80):
456 """Wrap multiple paragraphs to fit a specified width.
392 """Wrap multiple paragraphs to fit a specified width.
457
393
458 This is equivalent to textwrap.wrap, but with support for multiple
394 This is equivalent to textwrap.wrap, but with support for multiple
459 paragraphs, as separated by empty lines.
395 paragraphs, as separated by empty lines.
460
396
461 Returns
397 Returns
462 -------
398 -------
463
399
464 list of complete paragraphs, wrapped to fill `ncols` columns.
400 list of complete paragraphs, wrapped to fill `ncols` columns.
465 """
401 """
466 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
402 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
467 text = dedent(text).strip()
403 text = dedent(text).strip()
468 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
404 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
469 out_ps = []
405 out_ps = []
470 indent_re = re.compile(r'\n\s+', re.MULTILINE)
406 indent_re = re.compile(r'\n\s+', re.MULTILINE)
471 for p in paragraphs:
407 for p in paragraphs:
472 # presume indentation that survives dedent is meaningful formatting,
408 # presume indentation that survives dedent is meaningful formatting,
473 # so don't fill unless text is flush.
409 # so don't fill unless text is flush.
474 if indent_re.search(p) is None:
410 if indent_re.search(p) is None:
475 # wrap paragraph
411 # wrap paragraph
476 p = textwrap.fill(p, ncols)
412 p = textwrap.fill(p, ncols)
477 out_ps.append(p)
413 out_ps.append(p)
478 return out_ps
414 return out_ps
479
415
480
416
481 def long_substr(data):
417 def long_substr(data):
482 """Return the longest common substring in a list of strings.
418 """Return the longest common substring in a list of strings.
483
419
484 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
420 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
485 """
421 """
486 substr = ''
422 substr = ''
487 if len(data) > 1 and len(data[0]) > 0:
423 if len(data) > 1 and len(data[0]) > 0:
488 for i in range(len(data[0])):
424 for i in range(len(data[0])):
489 for j in range(len(data[0])-i+1):
425 for j in range(len(data[0])-i+1):
490 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
426 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
491 substr = data[0][i:i+j]
427 substr = data[0][i:i+j]
492 elif len(data) == 1:
428 elif len(data) == 1:
493 substr = data[0]
429 substr = data[0]
494 return substr
430 return substr
495
431
496
432
497 def strip_email_quotes(text):
433 def strip_email_quotes(text):
498 """Strip leading email quotation characters ('>').
434 """Strip leading email quotation characters ('>').
499
435
500 Removes any combination of leading '>' interspersed with whitespace that
436 Removes any combination of leading '>' interspersed with whitespace that
501 appears *identically* in all lines of the input text.
437 appears *identically* in all lines of the input text.
502
438
503 Parameters
439 Parameters
504 ----------
440 ----------
505 text : str
441 text : str
506
442
507 Examples
443 Examples
508 --------
444 --------
509
445
510 Simple uses::
446 Simple uses::
511
447
512 In [2]: strip_email_quotes('> > text')
448 In [2]: strip_email_quotes('> > text')
513 Out[2]: 'text'
449 Out[2]: 'text'
514
450
515 In [3]: strip_email_quotes('> > text\\n> > more')
451 In [3]: strip_email_quotes('> > text\\n> > more')
516 Out[3]: 'text\\nmore'
452 Out[3]: 'text\\nmore'
517
453
518 Note how only the common prefix that appears in all lines is stripped::
454 Note how only the common prefix that appears in all lines is stripped::
519
455
520 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
456 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
521 Out[4]: '> text\\n> more\\nmore...'
457 Out[4]: '> text\\n> more\\nmore...'
522
458
523 So if any line has no quote marks ('>') , then none are stripped from any
459 So if any line has no quote marks ('>') , then none are stripped from any
524 of them ::
460 of them ::
525
461
526 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
462 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
527 Out[5]: '> > text\\n> > more\\nlast different'
463 Out[5]: '> > text\\n> > more\\nlast different'
528 """
464 """
529 lines = text.splitlines()
465 lines = text.splitlines()
530 matches = set()
466 matches = set()
531 for line in lines:
467 for line in lines:
532 prefix = re.match(r'^(\s*>[ >]*)', line)
468 prefix = re.match(r'^(\s*>[ >]*)', line)
533 if prefix:
469 if prefix:
534 matches.add(prefix.group(1))
470 matches.add(prefix.group(1))
535 else:
471 else:
536 break
472 break
537 else:
473 else:
538 prefix = long_substr(list(matches))
474 prefix = long_substr(list(matches))
539 if prefix:
475 if prefix:
540 strip = len(prefix)
476 strip = len(prefix)
541 text = '\n'.join([ ln[strip:] for ln in lines])
477 text = '\n'.join([ ln[strip:] for ln in lines])
542 return text
478 return text
543
479
544
480
545 class EvalFormatter(Formatter):
481 class EvalFormatter(Formatter):
546 """A String Formatter that allows evaluation of simple expressions.
482 """A String Formatter that allows evaluation of simple expressions.
547
483
548 Note that this version interprets a : as specifying a format string (as per
484 Note that this version interprets a : as specifying a format string (as per
549 standard string formatting), so if slicing is required, you must explicitly
485 standard string formatting), so if slicing is required, you must explicitly
550 create a slice.
486 create a slice.
551
487
552 This is to be used in templating cases, such as the parallel batch
488 This is to be used in templating cases, such as the parallel batch
553 script templates, where simple arithmetic on arguments is useful.
489 script templates, where simple arithmetic on arguments is useful.
554
490
555 Examples
491 Examples
556 --------
492 --------
557
493
558 In [1]: f = EvalFormatter()
494 In [1]: f = EvalFormatter()
559 In [2]: f.format('{n//4}', n=8)
495 In [2]: f.format('{n//4}', n=8)
560 Out [2]: '2'
496 Out [2]: '2'
561
497
562 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
498 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
563 Out [3]: 'll'
499 Out [3]: 'll'
564 """
500 """
565 def get_field(self, name, args, kwargs):
501 def get_field(self, name, args, kwargs):
566 v = eval(name, kwargs)
502 v = eval(name, kwargs)
567 return v, name
503 return v, name
568
504
569
505
570 @skip_doctest_py3
506 @skip_doctest_py3
571 class FullEvalFormatter(Formatter):
507 class FullEvalFormatter(Formatter):
572 """A String Formatter that allows evaluation of simple expressions.
508 """A String Formatter that allows evaluation of simple expressions.
573
509
574 Any time a format key is not found in the kwargs,
510 Any time a format key is not found in the kwargs,
575 it will be tried as an expression in the kwargs namespace.
511 it will be tried as an expression in the kwargs namespace.
576
512
577 Note that this version allows slicing using [1:2], so you cannot specify
513 Note that this version allows slicing using [1:2], so you cannot specify
578 a format string. Use :class:`EvalFormatter` to permit format strings.
514 a format string. Use :class:`EvalFormatter` to permit format strings.
579
515
580 Examples
516 Examples
581 --------
517 --------
582
518
583 In [1]: f = FullEvalFormatter()
519 In [1]: f = FullEvalFormatter()
584 In [2]: f.format('{n//4}', n=8)
520 In [2]: f.format('{n//4}', n=8)
585 Out[2]: u'2'
521 Out[2]: u'2'
586
522
587 In [3]: f.format('{list(range(5))[2:4]}')
523 In [3]: f.format('{list(range(5))[2:4]}')
588 Out[3]: u'[2, 3]'
524 Out[3]: u'[2, 3]'
589
525
590 In [4]: f.format('{3*2}')
526 In [4]: f.format('{3*2}')
591 Out[4]: u'6'
527 Out[4]: u'6'
592 """
528 """
593 # copied from Formatter._vformat with minor changes to allow eval
529 # copied from Formatter._vformat with minor changes to allow eval
594 # and replace the format_spec code with slicing
530 # and replace the format_spec code with slicing
595 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
531 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
596 if recursion_depth < 0:
532 if recursion_depth < 0:
597 raise ValueError('Max string recursion exceeded')
533 raise ValueError('Max string recursion exceeded')
598 result = []
534 result = []
599 for literal_text, field_name, format_spec, conversion in \
535 for literal_text, field_name, format_spec, conversion in \
600 self.parse(format_string):
536 self.parse(format_string):
601
537
602 # output the literal text
538 # output the literal text
603 if literal_text:
539 if literal_text:
604 result.append(literal_text)
540 result.append(literal_text)
605
541
606 # if there's a field, output it
542 # if there's a field, output it
607 if field_name is not None:
543 if field_name is not None:
608 # this is some markup, find the object and do
544 # this is some markup, find the object and do
609 # the formatting
545 # the formatting
610
546
611 if format_spec:
547 if format_spec:
612 # override format spec, to allow slicing:
548 # override format spec, to allow slicing:
613 field_name = ':'.join([field_name, format_spec])
549 field_name = ':'.join([field_name, format_spec])
614
550
615 # eval the contents of the field for the object
551 # eval the contents of the field for the object
616 # to be formatted
552 # to be formatted
617 obj = eval(field_name, kwargs)
553 obj = eval(field_name, kwargs)
618
554
619 # do any conversion on the resulting object
555 # do any conversion on the resulting object
620 obj = self.convert_field(obj, conversion)
556 obj = self.convert_field(obj, conversion)
621
557
622 # format the object and append to the result
558 # format the object and append to the result
623 result.append(self.format_field(obj, ''))
559 result.append(self.format_field(obj, ''))
624
560
625 return u''.join(py3compat.cast_unicode(s) for s in result)
561 return u''.join(py3compat.cast_unicode(s) for s in result)
626
562
627
563
628 @skip_doctest_py3
564 @skip_doctest_py3
629 class DollarFormatter(FullEvalFormatter):
565 class DollarFormatter(FullEvalFormatter):
630 """Formatter allowing Itpl style $foo replacement, for names and attribute
566 """Formatter allowing Itpl style $foo replacement, for names and attribute
631 access only. Standard {foo} replacement also works, and allows full
567 access only. Standard {foo} replacement also works, and allows full
632 evaluation of its arguments.
568 evaluation of its arguments.
633
569
634 Examples
570 Examples
635 --------
571 --------
636 In [1]: f = DollarFormatter()
572 In [1]: f = DollarFormatter()
637 In [2]: f.format('{n//4}', n=8)
573 In [2]: f.format('{n//4}', n=8)
638 Out[2]: u'2'
574 Out[2]: u'2'
639
575
640 In [3]: f.format('23 * 76 is $result', result=23*76)
576 In [3]: f.format('23 * 76 is $result', result=23*76)
641 Out[3]: u'23 * 76 is 1748'
577 Out[3]: u'23 * 76 is 1748'
642
578
643 In [4]: f.format('$a or {b}', a=1, b=2)
579 In [4]: f.format('$a or {b}', a=1, b=2)
644 Out[4]: u'1 or 2'
580 Out[4]: u'1 or 2'
645 """
581 """
646 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
582 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
647 def parse(self, fmt_string):
583 def parse(self, fmt_string):
648 for literal_txt, field_name, format_spec, conversion \
584 for literal_txt, field_name, format_spec, conversion \
649 in Formatter.parse(self, fmt_string):
585 in Formatter.parse(self, fmt_string):
650
586
651 # Find $foo patterns in the literal text.
587 # Find $foo patterns in the literal text.
652 continue_from = 0
588 continue_from = 0
653 txt = ""
589 txt = ""
654 for m in self._dollar_pattern.finditer(literal_txt):
590 for m in self._dollar_pattern.finditer(literal_txt):
655 new_txt, new_field = m.group(1,2)
591 new_txt, new_field = m.group(1,2)
656 # $$foo --> $foo
592 # $$foo --> $foo
657 if new_field.startswith("$"):
593 if new_field.startswith("$"):
658 txt += new_txt + new_field
594 txt += new_txt + new_field
659 else:
595 else:
660 yield (txt + new_txt, new_field, "", None)
596 yield (txt + new_txt, new_field, "", None)
661 txt = ""
597 txt = ""
662 continue_from = m.end()
598 continue_from = m.end()
663
599
664 # Re-yield the {foo} style pattern
600 # Re-yield the {foo} style pattern
665 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
601 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
666
602
667 #-----------------------------------------------------------------------------
603 #-----------------------------------------------------------------------------
668 # Utils to columnize a list of string
604 # Utils to columnize a list of string
669 #-----------------------------------------------------------------------------
605 #-----------------------------------------------------------------------------
670
606
671 def _chunks(l, n):
607 def _chunks(l, n):
672 """Yield successive n-sized chunks from l."""
608 """Yield successive n-sized chunks from l."""
673 for i in xrange(0, len(l), n):
609 for i in xrange(0, len(l), n):
674 yield l[i:i+n]
610 yield l[i:i+n]
675
611
676
612
677 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
613 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
678 """Calculate optimal info to columnize a list of string"""
614 """Calculate optimal info to columnize a list of string"""
679 for nrow in range(1, len(rlist)+1) :
615 for nrow in range(1, len(rlist)+1) :
680 chk = map(max,_chunks(rlist, nrow))
616 chk = map(max,_chunks(rlist, nrow))
681 sumlength = sum(chk)
617 sumlength = sum(chk)
682 ncols = len(chk)
618 ncols = len(chk)
683 if sumlength+separator_size*(ncols-1) <= displaywidth :
619 if sumlength+separator_size*(ncols-1) <= displaywidth :
684 break;
620 break;
685 return {'columns_numbers' : ncols,
621 return {'columns_numbers' : ncols,
686 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
622 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
687 'rows_numbers' : nrow,
623 'rows_numbers' : nrow,
688 'columns_width' : chk
624 'columns_width' : chk
689 }
625 }
690
626
691
627
692 def _get_or_default(mylist, i, default=None):
628 def _get_or_default(mylist, i, default=None):
693 """return list item number, or default if don't exist"""
629 """return list item number, or default if don't exist"""
694 if i >= len(mylist):
630 if i >= len(mylist):
695 return default
631 return default
696 else :
632 else :
697 return mylist[i]
633 return mylist[i]
698
634
699
635
700 @skip_doctest
636 @skip_doctest
701 def compute_item_matrix(items, empty=None, *args, **kwargs) :
637 def compute_item_matrix(items, empty=None, *args, **kwargs) :
702 """Returns a nested list, and info to columnize items
638 """Returns a nested list, and info to columnize items
703
639
704 Parameters
640 Parameters
705 ----------
641 ----------
706
642
707 items :
643 items :
708 list of strings to columize
644 list of strings to columize
709 empty : (default None)
645 empty : (default None)
710 default value to fill list if needed
646 default value to fill list if needed
711 separator_size : int (default=2)
647 separator_size : int (default=2)
712 How much caracters will be used as a separation between each columns.
648 How much caracters will be used as a separation between each columns.
713 displaywidth : int (default=80)
649 displaywidth : int (default=80)
714 The width of the area onto wich the columns should enter
650 The width of the area onto wich the columns should enter
715
651
716 Returns
652 Returns
717 -------
653 -------
718
654
719 Returns a tuple of (strings_matrix, dict_info)
655 Returns a tuple of (strings_matrix, dict_info)
720
656
721 strings_matrix :
657 strings_matrix :
722
658
723 nested list of string, the outer most list contains as many list as
659 nested list of string, the outer most list contains as many list as
724 rows, the innermost lists have each as many element as colums. If the
660 rows, the innermost lists have each as many element as colums. If the
725 total number of elements in `items` does not equal the product of
661 total number of elements in `items` does not equal the product of
726 rows*columns, the last element of some lists are filled with `None`.
662 rows*columns, the last element of some lists are filled with `None`.
727
663
728 dict_info :
664 dict_info :
729 some info to make columnize easier:
665 some info to make columnize easier:
730
666
731 columns_numbers : number of columns
667 columns_numbers : number of columns
732 rows_numbers : number of rows
668 rows_numbers : number of rows
733 columns_width : list of with of each columns
669 columns_width : list of with of each columns
734 optimal_separator_width : best separator width between columns
670 optimal_separator_width : best separator width between columns
735
671
736 Examples
672 Examples
737 --------
673 --------
738
674
739 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
675 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
740 ...: compute_item_matrix(l,displaywidth=12)
676 ...: compute_item_matrix(l,displaywidth=12)
741 Out[1]:
677 Out[1]:
742 ([['aaa', 'f', 'k'],
678 ([['aaa', 'f', 'k'],
743 ['b', 'g', 'l'],
679 ['b', 'g', 'l'],
744 ['cc', 'h', None],
680 ['cc', 'h', None],
745 ['d', 'i', None],
681 ['d', 'i', None],
746 ['eeeee', 'j', None]],
682 ['eeeee', 'j', None]],
747 {'columns_numbers': 3,
683 {'columns_numbers': 3,
748 'columns_width': [5, 1, 1],
684 'columns_width': [5, 1, 1],
749 'optimal_separator_width': 2,
685 'optimal_separator_width': 2,
750 'rows_numbers': 5})
686 'rows_numbers': 5})
751
687
752 """
688 """
753 info = _find_optimal(map(len, items), *args, **kwargs)
689 info = _find_optimal(map(len, items), *args, **kwargs)
754 nrow, ncol = info['rows_numbers'], info['columns_numbers']
690 nrow, ncol = info['rows_numbers'], info['columns_numbers']
755 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
691 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
756
692
757
693
758 def columnize(items, separator=' ', displaywidth=80):
694 def columnize(items, separator=' ', displaywidth=80):
759 """ Transform a list of strings into a single string with columns.
695 """ Transform a list of strings into a single string with columns.
760
696
761 Parameters
697 Parameters
762 ----------
698 ----------
763 items : sequence of strings
699 items : sequence of strings
764 The strings to process.
700 The strings to process.
765
701
766 separator : str, optional [default is two spaces]
702 separator : str, optional [default is two spaces]
767 The string that separates columns.
703 The string that separates columns.
768
704
769 displaywidth : int, optional [default is 80]
705 displaywidth : int, optional [default is 80]
770 Width of the display in number of characters.
706 Width of the display in number of characters.
771
707
772 Returns
708 Returns
773 -------
709 -------
774 The formatted string.
710 The formatted string.
775 """
711 """
776 if not items :
712 if not items :
777 return '\n'
713 return '\n'
778 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
714 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
779 fmatrix = [filter(None, x) for x in matrix]
715 fmatrix = [filter(None, x) for x in matrix]
780 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
716 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
781 return '\n'.join(map(sjoin, fmatrix))+'\n'
717 return '\n'.join(map(sjoin, fmatrix))+'\n'
General Comments 0
You need to be logged in to leave comments. Login now