##// END OF EJS Templates
Remove unused *grep functions
Thomas Kluyver -
Show More
@@ -1,823 +1,781 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2008-2011 The IPython Development Team
12 # Copyright (C) 2008-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21
21
22 import __main__
22 import __main__
23
23
24 import os
24 import os
25 import re
25 import re
26 import sys
26 import sys
27 import textwrap
27 import textwrap
28 from string import Formatter
28 from string import Formatter
29
29
30 from IPython.external.path import path
30 from IPython.external.path import path
31 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
31 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
32 from IPython.utils import py3compat
32 from IPython.utils import py3compat
33 from IPython.utils.data import flatten
33 from IPython.utils.data import flatten
34
34
35 #-----------------------------------------------------------------------------
35 #-----------------------------------------------------------------------------
36 # Code
36 # Code
37 #-----------------------------------------------------------------------------
37 #-----------------------------------------------------------------------------
38
38
39 def unquote_ends(istr):
39 def unquote_ends(istr):
40 """Remove a single pair of quotes from the endpoints of a string."""
40 """Remove a single pair of quotes from the endpoints of a string."""
41
41
42 if not istr:
42 if not istr:
43 return istr
43 return istr
44 if (istr[0]=="'" and istr[-1]=="'") or \
44 if (istr[0]=="'" and istr[-1]=="'") or \
45 (istr[0]=='"' and istr[-1]=='"'):
45 (istr[0]=='"' and istr[-1]=='"'):
46 return istr[1:-1]
46 return istr[1:-1]
47 else:
47 else:
48 return istr
48 return istr
49
49
50
50
51 class LSString(str):
51 class LSString(str):
52 """String derivative with a special access attributes.
52 """String derivative with a special access attributes.
53
53
54 These are normal strings, but with the special attributes:
54 These are normal strings, but with the special attributes:
55
55
56 .l (or .list) : value as list (split on newlines).
56 .l (or .list) : value as list (split on newlines).
57 .n (or .nlstr): original value (the string itself).
57 .n (or .nlstr): original value (the string itself).
58 .s (or .spstr): value as whitespace-separated string.
58 .s (or .spstr): value as whitespace-separated string.
59 .p (or .paths): list of path objects
59 .p (or .paths): list of path objects
60
60
61 Any values which require transformations are computed only once and
61 Any values which require transformations are computed only once and
62 cached.
62 cached.
63
63
64 Such strings are very useful to efficiently interact with the shell, which
64 Such strings are very useful to efficiently interact with the shell, which
65 typically only understands whitespace-separated options for commands."""
65 typically only understands whitespace-separated options for commands."""
66
66
67 def get_list(self):
67 def get_list(self):
68 try:
68 try:
69 return self.__list
69 return self.__list
70 except AttributeError:
70 except AttributeError:
71 self.__list = self.split('\n')
71 self.__list = self.split('\n')
72 return self.__list
72 return self.__list
73
73
74 l = list = property(get_list)
74 l = list = property(get_list)
75
75
76 def get_spstr(self):
76 def get_spstr(self):
77 try:
77 try:
78 return self.__spstr
78 return self.__spstr
79 except AttributeError:
79 except AttributeError:
80 self.__spstr = self.replace('\n',' ')
80 self.__spstr = self.replace('\n',' ')
81 return self.__spstr
81 return self.__spstr
82
82
83 s = spstr = property(get_spstr)
83 s = spstr = property(get_spstr)
84
84
85 def get_nlstr(self):
85 def get_nlstr(self):
86 return self
86 return self
87
87
88 n = nlstr = property(get_nlstr)
88 n = nlstr = property(get_nlstr)
89
89
90 def get_paths(self):
90 def get_paths(self):
91 try:
91 try:
92 return self.__paths
92 return self.__paths
93 except AttributeError:
93 except AttributeError:
94 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
94 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
95 return self.__paths
95 return self.__paths
96
96
97 p = paths = property(get_paths)
97 p = paths = property(get_paths)
98
98
99 # FIXME: We need to reimplement type specific displayhook and then add this
99 # FIXME: We need to reimplement type specific displayhook and then add this
100 # back as a custom printer. This should also be moved outside utils into the
100 # back as a custom printer. This should also be moved outside utils into the
101 # core.
101 # core.
102
102
103 # def print_lsstring(arg):
103 # def print_lsstring(arg):
104 # """ Prettier (non-repr-like) and more informative printer for LSString """
104 # """ Prettier (non-repr-like) and more informative printer for LSString """
105 # print "LSString (.p, .n, .l, .s available). Value:"
105 # print "LSString (.p, .n, .l, .s available). Value:"
106 # print arg
106 # print arg
107 #
107 #
108 #
108 #
109 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
109 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
110
110
111
111
112 class SList(list):
112 class SList(list):
113 """List derivative with a special access attributes.
113 """List derivative with a special access attributes.
114
114
115 These are normal lists, but with the special attributes:
115 These are normal lists, but with the special attributes:
116
116
117 .l (or .list) : value as list (the list itself).
117 .l (or .list) : value as list (the list itself).
118 .n (or .nlstr): value as a string, joined on newlines.
118 .n (or .nlstr): value as a string, joined on newlines.
119 .s (or .spstr): value as a string, joined on spaces.
119 .s (or .spstr): value as a string, joined on spaces.
120 .p (or .paths): list of path objects
120 .p (or .paths): list of path objects
121
121
122 Any values which require transformations are computed only once and
122 Any values which require transformations are computed only once and
123 cached."""
123 cached."""
124
124
125 def get_list(self):
125 def get_list(self):
126 return self
126 return self
127
127
128 l = list = property(get_list)
128 l = list = property(get_list)
129
129
130 def get_spstr(self):
130 def get_spstr(self):
131 try:
131 try:
132 return self.__spstr
132 return self.__spstr
133 except AttributeError:
133 except AttributeError:
134 self.__spstr = ' '.join(self)
134 self.__spstr = ' '.join(self)
135 return self.__spstr
135 return self.__spstr
136
136
137 s = spstr = property(get_spstr)
137 s = spstr = property(get_spstr)
138
138
139 def get_nlstr(self):
139 def get_nlstr(self):
140 try:
140 try:
141 return self.__nlstr
141 return self.__nlstr
142 except AttributeError:
142 except AttributeError:
143 self.__nlstr = '\n'.join(self)
143 self.__nlstr = '\n'.join(self)
144 return self.__nlstr
144 return self.__nlstr
145
145
146 n = nlstr = property(get_nlstr)
146 n = nlstr = property(get_nlstr)
147
147
148 def get_paths(self):
148 def get_paths(self):
149 try:
149 try:
150 return self.__paths
150 return self.__paths
151 except AttributeError:
151 except AttributeError:
152 self.__paths = [path(p) for p in self if os.path.exists(p)]
152 self.__paths = [path(p) for p in self if os.path.exists(p)]
153 return self.__paths
153 return self.__paths
154
154
155 p = paths = property(get_paths)
155 p = paths = property(get_paths)
156
156
157 def grep(self, pattern, prune = False, field = None):
157 def grep(self, pattern, prune = False, field = None):
158 """ Return all strings matching 'pattern' (a regex or callable)
158 """ Return all strings matching 'pattern' (a regex or callable)
159
159
160 This is case-insensitive. If prune is true, return all items
160 This is case-insensitive. If prune is true, return all items
161 NOT matching the pattern.
161 NOT matching the pattern.
162
162
163 If field is specified, the match must occur in the specified
163 If field is specified, the match must occur in the specified
164 whitespace-separated field.
164 whitespace-separated field.
165
165
166 Examples::
166 Examples::
167
167
168 a.grep( lambda x: x.startswith('C') )
168 a.grep( lambda x: x.startswith('C') )
169 a.grep('Cha.*log', prune=1)
169 a.grep('Cha.*log', prune=1)
170 a.grep('chm', field=-1)
170 a.grep('chm', field=-1)
171 """
171 """
172
172
173 def match_target(s):
173 def match_target(s):
174 if field is None:
174 if field is None:
175 return s
175 return s
176 parts = s.split()
176 parts = s.split()
177 try:
177 try:
178 tgt = parts[field]
178 tgt = parts[field]
179 return tgt
179 return tgt
180 except IndexError:
180 except IndexError:
181 return ""
181 return ""
182
182
183 if isinstance(pattern, basestring):
183 if isinstance(pattern, basestring):
184 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
184 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
185 else:
185 else:
186 pred = pattern
186 pred = pattern
187 if not prune:
187 if not prune:
188 return SList([el for el in self if pred(match_target(el))])
188 return SList([el for el in self if pred(match_target(el))])
189 else:
189 else:
190 return SList([el for el in self if not pred(match_target(el))])
190 return SList([el for el in self if not pred(match_target(el))])
191
191
192 def fields(self, *fields):
192 def fields(self, *fields):
193 """ Collect whitespace-separated fields from string list
193 """ Collect whitespace-separated fields from string list
194
194
195 Allows quick awk-like usage of string lists.
195 Allows quick awk-like usage of string lists.
196
196
197 Example data (in var a, created by 'a = !ls -l')::
197 Example data (in var a, created by 'a = !ls -l')::
198 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
198 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
199 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
199 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
200
200
201 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
201 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
202 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
202 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
203 (note the joining by space).
203 (note the joining by space).
204 a.fields(-1) is ['ChangeLog', 'IPython']
204 a.fields(-1) is ['ChangeLog', 'IPython']
205
205
206 IndexErrors are ignored.
206 IndexErrors are ignored.
207
207
208 Without args, fields() just split()'s the strings.
208 Without args, fields() just split()'s the strings.
209 """
209 """
210 if len(fields) == 0:
210 if len(fields) == 0:
211 return [el.split() for el in self]
211 return [el.split() for el in self]
212
212
213 res = SList()
213 res = SList()
214 for el in [f.split() for f in self]:
214 for el in [f.split() for f in self]:
215 lineparts = []
215 lineparts = []
216
216
217 for fd in fields:
217 for fd in fields:
218 try:
218 try:
219 lineparts.append(el[fd])
219 lineparts.append(el[fd])
220 except IndexError:
220 except IndexError:
221 pass
221 pass
222 if lineparts:
222 if lineparts:
223 res.append(" ".join(lineparts))
223 res.append(" ".join(lineparts))
224
224
225 return res
225 return res
226
226
227 def sort(self,field= None, nums = False):
227 def sort(self,field= None, nums = False):
228 """ sort by specified fields (see fields())
228 """ sort by specified fields (see fields())
229
229
230 Example::
230 Example::
231 a.sort(1, nums = True)
231 a.sort(1, nums = True)
232
232
233 Sorts a by second field, in numerical order (so that 21 > 3)
233 Sorts a by second field, in numerical order (so that 21 > 3)
234
234
235 """
235 """
236
236
237 #decorate, sort, undecorate
237 #decorate, sort, undecorate
238 if field is not None:
238 if field is not None:
239 dsu = [[SList([line]).fields(field), line] for line in self]
239 dsu = [[SList([line]).fields(field), line] for line in self]
240 else:
240 else:
241 dsu = [[line, line] for line in self]
241 dsu = [[line, line] for line in self]
242 if nums:
242 if nums:
243 for i in range(len(dsu)):
243 for i in range(len(dsu)):
244 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
244 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
245 try:
245 try:
246 n = int(numstr)
246 n = int(numstr)
247 except ValueError:
247 except ValueError:
248 n = 0;
248 n = 0;
249 dsu[i][0] = n
249 dsu[i][0] = n
250
250
251
251
252 dsu.sort()
252 dsu.sort()
253 return SList([t[1] for t in dsu])
253 return SList([t[1] for t in dsu])
254
254
255
255
256 # FIXME: We need to reimplement type specific displayhook and then add this
256 # FIXME: We need to reimplement type specific displayhook and then add this
257 # back as a custom printer. This should also be moved outside utils into the
257 # back as a custom printer. This should also be moved outside utils into the
258 # core.
258 # core.
259
259
260 # def print_slist(arg):
260 # def print_slist(arg):
261 # """ Prettier (non-repr-like) and more informative printer for SList """
261 # """ Prettier (non-repr-like) and more informative printer for SList """
262 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
262 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
263 # if hasattr(arg, 'hideonce') and arg.hideonce:
263 # if hasattr(arg, 'hideonce') and arg.hideonce:
264 # arg.hideonce = False
264 # arg.hideonce = False
265 # return
265 # return
266 #
266 #
267 # nlprint(arg) # This was a nested list printer, now removed.
267 # nlprint(arg) # This was a nested list printer, now removed.
268 #
268 #
269 # print_slist = result_display.when_type(SList)(print_slist)
269 # print_slist = result_display.when_type(SList)(print_slist)
270
270
271
271
272 def esc_quotes(strng):
272 def esc_quotes(strng):
273 """Return the input string with single and double quotes escaped out"""
273 """Return the input string with single and double quotes escaped out"""
274
274
275 return strng.replace('"','\\"').replace("'","\\'")
275 return strng.replace('"','\\"').replace("'","\\'")
276
276
277
277
278 def qw(words,flat=0,sep=None,maxsplit=-1):
278 def qw(words,flat=0,sep=None,maxsplit=-1):
279 """Similar to Perl's qw() operator, but with some more options.
279 """Similar to Perl's qw() operator, but with some more options.
280
280
281 qw(words,flat=0,sep=' ',maxsplit=-1) -> words.split(sep,maxsplit)
281 qw(words,flat=0,sep=' ',maxsplit=-1) -> words.split(sep,maxsplit)
282
282
283 words can also be a list itself, and with flat=1, the output will be
283 words can also be a list itself, and with flat=1, the output will be
284 recursively flattened.
284 recursively flattened.
285
285
286 Examples:
286 Examples:
287
287
288 >>> qw('1 2')
288 >>> qw('1 2')
289 ['1', '2']
289 ['1', '2']
290
290
291 >>> qw(['a b','1 2',['m n','p q']])
291 >>> qw(['a b','1 2',['m n','p q']])
292 [['a', 'b'], ['1', '2'], [['m', 'n'], ['p', 'q']]]
292 [['a', 'b'], ['1', '2'], [['m', 'n'], ['p', 'q']]]
293
293
294 >>> qw(['a b','1 2',['m n','p q']],flat=1)
294 >>> qw(['a b','1 2',['m n','p q']],flat=1)
295 ['a', 'b', '1', '2', 'm', 'n', 'p', 'q']
295 ['a', 'b', '1', '2', 'm', 'n', 'p', 'q']
296 """
296 """
297
297
298 if isinstance(words, basestring):
298 if isinstance(words, basestring):
299 return [word.strip() for word in words.split(sep,maxsplit)
299 return [word.strip() for word in words.split(sep,maxsplit)
300 if word and not word.isspace() ]
300 if word and not word.isspace() ]
301 if flat:
301 if flat:
302 return flatten(map(qw,words,[1]*len(words)))
302 return flatten(map(qw,words,[1]*len(words)))
303 return map(qw,words)
303 return map(qw,words)
304
304
305
305
306 def qwflat(words,sep=None,maxsplit=-1):
306 def qwflat(words,sep=None,maxsplit=-1):
307 """Calls qw(words) in flat mode. It's just a convenient shorthand."""
307 """Calls qw(words) in flat mode. It's just a convenient shorthand."""
308 return qw(words,1,sep,maxsplit)
308 return qw(words,1,sep,maxsplit)
309
309
310
310
311 def qw_lol(indata):
311 def qw_lol(indata):
312 """qw_lol('a b') -> [['a','b']],
312 """qw_lol('a b') -> [['a','b']],
313 otherwise it's just a call to qw().
313 otherwise it's just a call to qw().
314
314
315 We need this to make sure the modules_some keys *always* end up as a
315 We need this to make sure the modules_some keys *always* end up as a
316 list of lists."""
316 list of lists."""
317
317
318 if isinstance(indata, basestring):
318 if isinstance(indata, basestring):
319 return [qw(indata)]
319 return [qw(indata)]
320 else:
320 else:
321 return qw(indata)
321 return qw(indata)
322
322
323
323
324 def grep(pat,list,case=1):
325 """Simple minded grep-like function.
326 grep(pat,list) returns occurrences of pat in list, None on failure.
327
328 It only does simple string matching, with no support for regexps. Use the
329 option case=0 for case-insensitive matching."""
330
331 # This is pretty crude. At least it should implement copying only references
332 # to the original data in case it's big. Now it copies the data for output.
333 out=[]
334 if case:
335 for term in list:
336 if term.find(pat)>-1: out.append(term)
337 else:
338 lpat=pat.lower()
339 for term in list:
340 if term.lower().find(lpat)>-1: out.append(term)
341
342 if len(out): return out
343 else: return None
344
345
346 def dgrep(pat,*opts):
347 """Return grep() on dir()+dir(__builtins__).
348
349 A very common use of grep() when working interactively."""
350
351 return grep(pat,dir(__main__)+dir(__main__.__builtins__),*opts)
352
353
354 def idgrep(pat):
355 """Case-insensitive dgrep()"""
356
357 return dgrep(pat,0)
358
359
360 def igrep(pat,list):
361 """Synonym for case-insensitive grep."""
362
363 return grep(pat,list,case=0)
364
365
366 def indent(instr,nspaces=4, ntabs=0, flatten=False):
324 def indent(instr,nspaces=4, ntabs=0, flatten=False):
367 """Indent a string a given number of spaces or tabstops.
325 """Indent a string a given number of spaces or tabstops.
368
326
369 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
327 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
370
328
371 Parameters
329 Parameters
372 ----------
330 ----------
373
331
374 instr : basestring
332 instr : basestring
375 The string to be indented.
333 The string to be indented.
376 nspaces : int (default: 4)
334 nspaces : int (default: 4)
377 The number of spaces to be indented.
335 The number of spaces to be indented.
378 ntabs : int (default: 0)
336 ntabs : int (default: 0)
379 The number of tabs to be indented.
337 The number of tabs to be indented.
380 flatten : bool (default: False)
338 flatten : bool (default: False)
381 Whether to scrub existing indentation. If True, all lines will be
339 Whether to scrub existing indentation. If True, all lines will be
382 aligned to the same indentation. If False, existing indentation will
340 aligned to the same indentation. If False, existing indentation will
383 be strictly increased.
341 be strictly increased.
384
342
385 Returns
343 Returns
386 -------
344 -------
387
345
388 str|unicode : string indented by ntabs and nspaces.
346 str|unicode : string indented by ntabs and nspaces.
389
347
390 """
348 """
391 if instr is None:
349 if instr is None:
392 return
350 return
393 ind = '\t'*ntabs+' '*nspaces
351 ind = '\t'*ntabs+' '*nspaces
394 if flatten:
352 if flatten:
395 pat = re.compile(r'^\s*', re.MULTILINE)
353 pat = re.compile(r'^\s*', re.MULTILINE)
396 else:
354 else:
397 pat = re.compile(r'^', re.MULTILINE)
355 pat = re.compile(r'^', re.MULTILINE)
398 outstr = re.sub(pat, ind, instr)
356 outstr = re.sub(pat, ind, instr)
399 if outstr.endswith(os.linesep+ind):
357 if outstr.endswith(os.linesep+ind):
400 return outstr[:-len(ind)]
358 return outstr[:-len(ind)]
401 else:
359 else:
402 return outstr
360 return outstr
403
361
404
362
405 def list_strings(arg):
363 def list_strings(arg):
406 """Always return a list of strings, given a string or list of strings
364 """Always return a list of strings, given a string or list of strings
407 as input.
365 as input.
408
366
409 :Examples:
367 :Examples:
410
368
411 In [7]: list_strings('A single string')
369 In [7]: list_strings('A single string')
412 Out[7]: ['A single string']
370 Out[7]: ['A single string']
413
371
414 In [8]: list_strings(['A single string in a list'])
372 In [8]: list_strings(['A single string in a list'])
415 Out[8]: ['A single string in a list']
373 Out[8]: ['A single string in a list']
416
374
417 In [9]: list_strings(['A','list','of','strings'])
375 In [9]: list_strings(['A','list','of','strings'])
418 Out[9]: ['A', 'list', 'of', 'strings']
376 Out[9]: ['A', 'list', 'of', 'strings']
419 """
377 """
420
378
421 if isinstance(arg,basestring): return [arg]
379 if isinstance(arg,basestring): return [arg]
422 else: return arg
380 else: return arg
423
381
424
382
425 def marquee(txt='',width=78,mark='*'):
383 def marquee(txt='',width=78,mark='*'):
426 """Return the input string centered in a 'marquee'.
384 """Return the input string centered in a 'marquee'.
427
385
428 :Examples:
386 :Examples:
429
387
430 In [16]: marquee('A test',40)
388 In [16]: marquee('A test',40)
431 Out[16]: '**************** A test ****************'
389 Out[16]: '**************** A test ****************'
432
390
433 In [17]: marquee('A test',40,'-')
391 In [17]: marquee('A test',40,'-')
434 Out[17]: '---------------- A test ----------------'
392 Out[17]: '---------------- A test ----------------'
435
393
436 In [18]: marquee('A test',40,' ')
394 In [18]: marquee('A test',40,' ')
437 Out[18]: ' A test '
395 Out[18]: ' A test '
438
396
439 """
397 """
440 if not txt:
398 if not txt:
441 return (mark*width)[:width]
399 return (mark*width)[:width]
442 nmark = (width-len(txt)-2)//len(mark)//2
400 nmark = (width-len(txt)-2)//len(mark)//2
443 if nmark < 0: nmark =0
401 if nmark < 0: nmark =0
444 marks = mark*nmark
402 marks = mark*nmark
445 return '%s %s %s' % (marks,txt,marks)
403 return '%s %s %s' % (marks,txt,marks)
446
404
447
405
448 ini_spaces_re = re.compile(r'^(\s+)')
406 ini_spaces_re = re.compile(r'^(\s+)')
449
407
450 def num_ini_spaces(strng):
408 def num_ini_spaces(strng):
451 """Return the number of initial spaces in a string"""
409 """Return the number of initial spaces in a string"""
452
410
453 ini_spaces = ini_spaces_re.match(strng)
411 ini_spaces = ini_spaces_re.match(strng)
454 if ini_spaces:
412 if ini_spaces:
455 return ini_spaces.end()
413 return ini_spaces.end()
456 else:
414 else:
457 return 0
415 return 0
458
416
459
417
460 def format_screen(strng):
418 def format_screen(strng):
461 """Format a string for screen printing.
419 """Format a string for screen printing.
462
420
463 This removes some latex-type format codes."""
421 This removes some latex-type format codes."""
464 # Paragraph continue
422 # Paragraph continue
465 par_re = re.compile(r'\\$',re.MULTILINE)
423 par_re = re.compile(r'\\$',re.MULTILINE)
466 strng = par_re.sub('',strng)
424 strng = par_re.sub('',strng)
467 return strng
425 return strng
468
426
469
427
470 def dedent(text):
428 def dedent(text):
471 """Equivalent of textwrap.dedent that ignores unindented first line.
429 """Equivalent of textwrap.dedent that ignores unindented first line.
472
430
473 This means it will still dedent strings like:
431 This means it will still dedent strings like:
474 '''foo
432 '''foo
475 is a bar
433 is a bar
476 '''
434 '''
477
435
478 For use in wrap_paragraphs.
436 For use in wrap_paragraphs.
479 """
437 """
480
438
481 if text.startswith('\n'):
439 if text.startswith('\n'):
482 # text starts with blank line, don't ignore the first line
440 # text starts with blank line, don't ignore the first line
483 return textwrap.dedent(text)
441 return textwrap.dedent(text)
484
442
485 # split first line
443 # split first line
486 splits = text.split('\n',1)
444 splits = text.split('\n',1)
487 if len(splits) == 1:
445 if len(splits) == 1:
488 # only one line
446 # only one line
489 return textwrap.dedent(text)
447 return textwrap.dedent(text)
490
448
491 first, rest = splits
449 first, rest = splits
492 # dedent everything but the first line
450 # dedent everything but the first line
493 rest = textwrap.dedent(rest)
451 rest = textwrap.dedent(rest)
494 return '\n'.join([first, rest])
452 return '\n'.join([first, rest])
495
453
496
454
497 def wrap_paragraphs(text, ncols=80):
455 def wrap_paragraphs(text, ncols=80):
498 """Wrap multiple paragraphs to fit a specified width.
456 """Wrap multiple paragraphs to fit a specified width.
499
457
500 This is equivalent to textwrap.wrap, but with support for multiple
458 This is equivalent to textwrap.wrap, but with support for multiple
501 paragraphs, as separated by empty lines.
459 paragraphs, as separated by empty lines.
502
460
503 Returns
461 Returns
504 -------
462 -------
505
463
506 list of complete paragraphs, wrapped to fill `ncols` columns.
464 list of complete paragraphs, wrapped to fill `ncols` columns.
507 """
465 """
508 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
466 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
509 text = dedent(text).strip()
467 text = dedent(text).strip()
510 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
468 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
511 out_ps = []
469 out_ps = []
512 indent_re = re.compile(r'\n\s+', re.MULTILINE)
470 indent_re = re.compile(r'\n\s+', re.MULTILINE)
513 for p in paragraphs:
471 for p in paragraphs:
514 # presume indentation that survives dedent is meaningful formatting,
472 # presume indentation that survives dedent is meaningful formatting,
515 # so don't fill unless text is flush.
473 # so don't fill unless text is flush.
516 if indent_re.search(p) is None:
474 if indent_re.search(p) is None:
517 # wrap paragraph
475 # wrap paragraph
518 p = textwrap.fill(p, ncols)
476 p = textwrap.fill(p, ncols)
519 out_ps.append(p)
477 out_ps.append(p)
520 return out_ps
478 return out_ps
521
479
522
480
523 def long_substr(data):
481 def long_substr(data):
524 """Return the longest common substring in a list of strings.
482 """Return the longest common substring in a list of strings.
525
483
526 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
484 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
527 """
485 """
528 substr = ''
486 substr = ''
529 if len(data) > 1 and len(data[0]) > 0:
487 if len(data) > 1 and len(data[0]) > 0:
530 for i in range(len(data[0])):
488 for i in range(len(data[0])):
531 for j in range(len(data[0])-i+1):
489 for j in range(len(data[0])-i+1):
532 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
490 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
533 substr = data[0][i:i+j]
491 substr = data[0][i:i+j]
534 elif len(data) == 1:
492 elif len(data) == 1:
535 substr = data[0]
493 substr = data[0]
536 return substr
494 return substr
537
495
538
496
539 def strip_email_quotes(text):
497 def strip_email_quotes(text):
540 """Strip leading email quotation characters ('>').
498 """Strip leading email quotation characters ('>').
541
499
542 Removes any combination of leading '>' interspersed with whitespace that
500 Removes any combination of leading '>' interspersed with whitespace that
543 appears *identically* in all lines of the input text.
501 appears *identically* in all lines of the input text.
544
502
545 Parameters
503 Parameters
546 ----------
504 ----------
547 text : str
505 text : str
548
506
549 Examples
507 Examples
550 --------
508 --------
551
509
552 Simple uses::
510 Simple uses::
553
511
554 In [2]: strip_email_quotes('> > text')
512 In [2]: strip_email_quotes('> > text')
555 Out[2]: 'text'
513 Out[2]: 'text'
556
514
557 In [3]: strip_email_quotes('> > text\\n> > more')
515 In [3]: strip_email_quotes('> > text\\n> > more')
558 Out[3]: 'text\\nmore'
516 Out[3]: 'text\\nmore'
559
517
560 Note how only the common prefix that appears in all lines is stripped::
518 Note how only the common prefix that appears in all lines is stripped::
561
519
562 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
520 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
563 Out[4]: '> text\\n> more\\nmore...'
521 Out[4]: '> text\\n> more\\nmore...'
564
522
565 So if any line has no quote marks ('>') , then none are stripped from any
523 So if any line has no quote marks ('>') , then none are stripped from any
566 of them ::
524 of them ::
567
525
568 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
526 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
569 Out[5]: '> > text\\n> > more\\nlast different'
527 Out[5]: '> > text\\n> > more\\nlast different'
570 """
528 """
571 lines = text.splitlines()
529 lines = text.splitlines()
572 matches = set()
530 matches = set()
573 for line in lines:
531 for line in lines:
574 prefix = re.match(r'^(\s*>[ >]*)', line)
532 prefix = re.match(r'^(\s*>[ >]*)', line)
575 if prefix:
533 if prefix:
576 matches.add(prefix.group(1))
534 matches.add(prefix.group(1))
577 else:
535 else:
578 break
536 break
579 else:
537 else:
580 prefix = long_substr(list(matches))
538 prefix = long_substr(list(matches))
581 if prefix:
539 if prefix:
582 strip = len(prefix)
540 strip = len(prefix)
583 text = '\n'.join([ ln[strip:] for ln in lines])
541 text = '\n'.join([ ln[strip:] for ln in lines])
584 return text
542 return text
585
543
586
544
587 class EvalFormatter(Formatter):
545 class EvalFormatter(Formatter):
588 """A String Formatter that allows evaluation of simple expressions.
546 """A String Formatter that allows evaluation of simple expressions.
589
547
590 Note that this version interprets a : as specifying a format string (as per
548 Note that this version interprets a : as specifying a format string (as per
591 standard string formatting), so if slicing is required, you must explicitly
549 standard string formatting), so if slicing is required, you must explicitly
592 create a slice.
550 create a slice.
593
551
594 This is to be used in templating cases, such as the parallel batch
552 This is to be used in templating cases, such as the parallel batch
595 script templates, where simple arithmetic on arguments is useful.
553 script templates, where simple arithmetic on arguments is useful.
596
554
597 Examples
555 Examples
598 --------
556 --------
599
557
600 In [1]: f = EvalFormatter()
558 In [1]: f = EvalFormatter()
601 In [2]: f.format('{n//4}', n=8)
559 In [2]: f.format('{n//4}', n=8)
602 Out [2]: '2'
560 Out [2]: '2'
603
561
604 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
562 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
605 Out [3]: 'll'
563 Out [3]: 'll'
606 """
564 """
607 def get_field(self, name, args, kwargs):
565 def get_field(self, name, args, kwargs):
608 v = eval(name, kwargs)
566 v = eval(name, kwargs)
609 return v, name
567 return v, name
610
568
611
569
612 @skip_doctest_py3
570 @skip_doctest_py3
613 class FullEvalFormatter(Formatter):
571 class FullEvalFormatter(Formatter):
614 """A String Formatter that allows evaluation of simple expressions.
572 """A String Formatter that allows evaluation of simple expressions.
615
573
616 Any time a format key is not found in the kwargs,
574 Any time a format key is not found in the kwargs,
617 it will be tried as an expression in the kwargs namespace.
575 it will be tried as an expression in the kwargs namespace.
618
576
619 Note that this version allows slicing using [1:2], so you cannot specify
577 Note that this version allows slicing using [1:2], so you cannot specify
620 a format string. Use :class:`EvalFormatter` to permit format strings.
578 a format string. Use :class:`EvalFormatter` to permit format strings.
621
579
622 Examples
580 Examples
623 --------
581 --------
624
582
625 In [1]: f = FullEvalFormatter()
583 In [1]: f = FullEvalFormatter()
626 In [2]: f.format('{n//4}', n=8)
584 In [2]: f.format('{n//4}', n=8)
627 Out[2]: u'2'
585 Out[2]: u'2'
628
586
629 In [3]: f.format('{list(range(5))[2:4]}')
587 In [3]: f.format('{list(range(5))[2:4]}')
630 Out[3]: u'[2, 3]'
588 Out[3]: u'[2, 3]'
631
589
632 In [4]: f.format('{3*2}')
590 In [4]: f.format('{3*2}')
633 Out[4]: u'6'
591 Out[4]: u'6'
634 """
592 """
635 # copied from Formatter._vformat with minor changes to allow eval
593 # copied from Formatter._vformat with minor changes to allow eval
636 # and replace the format_spec code with slicing
594 # and replace the format_spec code with slicing
637 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
595 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
638 if recursion_depth < 0:
596 if recursion_depth < 0:
639 raise ValueError('Max string recursion exceeded')
597 raise ValueError('Max string recursion exceeded')
640 result = []
598 result = []
641 for literal_text, field_name, format_spec, conversion in \
599 for literal_text, field_name, format_spec, conversion in \
642 self.parse(format_string):
600 self.parse(format_string):
643
601
644 # output the literal text
602 # output the literal text
645 if literal_text:
603 if literal_text:
646 result.append(literal_text)
604 result.append(literal_text)
647
605
648 # if there's a field, output it
606 # if there's a field, output it
649 if field_name is not None:
607 if field_name is not None:
650 # this is some markup, find the object and do
608 # this is some markup, find the object and do
651 # the formatting
609 # the formatting
652
610
653 if format_spec:
611 if format_spec:
654 # override format spec, to allow slicing:
612 # override format spec, to allow slicing:
655 field_name = ':'.join([field_name, format_spec])
613 field_name = ':'.join([field_name, format_spec])
656
614
657 # eval the contents of the field for the object
615 # eval the contents of the field for the object
658 # to be formatted
616 # to be formatted
659 obj = eval(field_name, kwargs)
617 obj = eval(field_name, kwargs)
660
618
661 # do any conversion on the resulting object
619 # do any conversion on the resulting object
662 obj = self.convert_field(obj, conversion)
620 obj = self.convert_field(obj, conversion)
663
621
664 # format the object and append to the result
622 # format the object and append to the result
665 result.append(self.format_field(obj, ''))
623 result.append(self.format_field(obj, ''))
666
624
667 return u''.join(py3compat.cast_unicode(s) for s in result)
625 return u''.join(py3compat.cast_unicode(s) for s in result)
668
626
669
627
670 @skip_doctest_py3
628 @skip_doctest_py3
671 class DollarFormatter(FullEvalFormatter):
629 class DollarFormatter(FullEvalFormatter):
672 """Formatter allowing Itpl style $foo replacement, for names and attribute
630 """Formatter allowing Itpl style $foo replacement, for names and attribute
673 access only. Standard {foo} replacement also works, and allows full
631 access only. Standard {foo} replacement also works, and allows full
674 evaluation of its arguments.
632 evaluation of its arguments.
675
633
676 Examples
634 Examples
677 --------
635 --------
678 In [1]: f = DollarFormatter()
636 In [1]: f = DollarFormatter()
679 In [2]: f.format('{n//4}', n=8)
637 In [2]: f.format('{n//4}', n=8)
680 Out[2]: u'2'
638 Out[2]: u'2'
681
639
682 In [3]: f.format('23 * 76 is $result', result=23*76)
640 In [3]: f.format('23 * 76 is $result', result=23*76)
683 Out[3]: u'23 * 76 is 1748'
641 Out[3]: u'23 * 76 is 1748'
684
642
685 In [4]: f.format('$a or {b}', a=1, b=2)
643 In [4]: f.format('$a or {b}', a=1, b=2)
686 Out[4]: u'1 or 2'
644 Out[4]: u'1 or 2'
687 """
645 """
688 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
646 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
689 def parse(self, fmt_string):
647 def parse(self, fmt_string):
690 for literal_txt, field_name, format_spec, conversion \
648 for literal_txt, field_name, format_spec, conversion \
691 in Formatter.parse(self, fmt_string):
649 in Formatter.parse(self, fmt_string):
692
650
693 # Find $foo patterns in the literal text.
651 # Find $foo patterns in the literal text.
694 continue_from = 0
652 continue_from = 0
695 txt = ""
653 txt = ""
696 for m in self._dollar_pattern.finditer(literal_txt):
654 for m in self._dollar_pattern.finditer(literal_txt):
697 new_txt, new_field = m.group(1,2)
655 new_txt, new_field = m.group(1,2)
698 # $$foo --> $foo
656 # $$foo --> $foo
699 if new_field.startswith("$"):
657 if new_field.startswith("$"):
700 txt += new_txt + new_field
658 txt += new_txt + new_field
701 else:
659 else:
702 yield (txt + new_txt, new_field, "", None)
660 yield (txt + new_txt, new_field, "", None)
703 txt = ""
661 txt = ""
704 continue_from = m.end()
662 continue_from = m.end()
705
663
706 # Re-yield the {foo} style pattern
664 # Re-yield the {foo} style pattern
707 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
665 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
708
666
709 #-----------------------------------------------------------------------------
667 #-----------------------------------------------------------------------------
710 # Utils to columnize a list of string
668 # Utils to columnize a list of string
711 #-----------------------------------------------------------------------------
669 #-----------------------------------------------------------------------------
712
670
713 def _chunks(l, n):
671 def _chunks(l, n):
714 """Yield successive n-sized chunks from l."""
672 """Yield successive n-sized chunks from l."""
715 for i in xrange(0, len(l), n):
673 for i in xrange(0, len(l), n):
716 yield l[i:i+n]
674 yield l[i:i+n]
717
675
718
676
719 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
677 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
720 """Calculate optimal info to columnize a list of string"""
678 """Calculate optimal info to columnize a list of string"""
721 for nrow in range(1, len(rlist)+1) :
679 for nrow in range(1, len(rlist)+1) :
722 chk = map(max,_chunks(rlist, nrow))
680 chk = map(max,_chunks(rlist, nrow))
723 sumlength = sum(chk)
681 sumlength = sum(chk)
724 ncols = len(chk)
682 ncols = len(chk)
725 if sumlength+separator_size*(ncols-1) <= displaywidth :
683 if sumlength+separator_size*(ncols-1) <= displaywidth :
726 break;
684 break;
727 return {'columns_numbers' : ncols,
685 return {'columns_numbers' : ncols,
728 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
686 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
729 'rows_numbers' : nrow,
687 'rows_numbers' : nrow,
730 'columns_width' : chk
688 'columns_width' : chk
731 }
689 }
732
690
733
691
734 def _get_or_default(mylist, i, default=None):
692 def _get_or_default(mylist, i, default=None):
735 """return list item number, or default if don't exist"""
693 """return list item number, or default if don't exist"""
736 if i >= len(mylist):
694 if i >= len(mylist):
737 return default
695 return default
738 else :
696 else :
739 return mylist[i]
697 return mylist[i]
740
698
741
699
742 @skip_doctest
700 @skip_doctest
743 def compute_item_matrix(items, empty=None, *args, **kwargs) :
701 def compute_item_matrix(items, empty=None, *args, **kwargs) :
744 """Returns a nested list, and info to columnize items
702 """Returns a nested list, and info to columnize items
745
703
746 Parameters
704 Parameters
747 ----------
705 ----------
748
706
749 items :
707 items :
750 list of strings to columize
708 list of strings to columize
751 empty : (default None)
709 empty : (default None)
752 default value to fill list if needed
710 default value to fill list if needed
753 separator_size : int (default=2)
711 separator_size : int (default=2)
754 How much caracters will be used as a separation between each columns.
712 How much caracters will be used as a separation between each columns.
755 displaywidth : int (default=80)
713 displaywidth : int (default=80)
756 The width of the area onto wich the columns should enter
714 The width of the area onto wich the columns should enter
757
715
758 Returns
716 Returns
759 -------
717 -------
760
718
761 Returns a tuple of (strings_matrix, dict_info)
719 Returns a tuple of (strings_matrix, dict_info)
762
720
763 strings_matrix :
721 strings_matrix :
764
722
765 nested list of string, the outer most list contains as many list as
723 nested list of string, the outer most list contains as many list as
766 rows, the innermost lists have each as many element as colums. If the
724 rows, the innermost lists have each as many element as colums. If the
767 total number of elements in `items` does not equal the product of
725 total number of elements in `items` does not equal the product of
768 rows*columns, the last element of some lists are filled with `None`.
726 rows*columns, the last element of some lists are filled with `None`.
769
727
770 dict_info :
728 dict_info :
771 some info to make columnize easier:
729 some info to make columnize easier:
772
730
773 columns_numbers : number of columns
731 columns_numbers : number of columns
774 rows_numbers : number of rows
732 rows_numbers : number of rows
775 columns_width : list of with of each columns
733 columns_width : list of with of each columns
776 optimal_separator_width : best separator width between columns
734 optimal_separator_width : best separator width between columns
777
735
778 Examples
736 Examples
779 --------
737 --------
780
738
781 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
739 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
782 ...: compute_item_matrix(l,displaywidth=12)
740 ...: compute_item_matrix(l,displaywidth=12)
783 Out[1]:
741 Out[1]:
784 ([['aaa', 'f', 'k'],
742 ([['aaa', 'f', 'k'],
785 ['b', 'g', 'l'],
743 ['b', 'g', 'l'],
786 ['cc', 'h', None],
744 ['cc', 'h', None],
787 ['d', 'i', None],
745 ['d', 'i', None],
788 ['eeeee', 'j', None]],
746 ['eeeee', 'j', None]],
789 {'columns_numbers': 3,
747 {'columns_numbers': 3,
790 'columns_width': [5, 1, 1],
748 'columns_width': [5, 1, 1],
791 'optimal_separator_width': 2,
749 'optimal_separator_width': 2,
792 'rows_numbers': 5})
750 'rows_numbers': 5})
793
751
794 """
752 """
795 info = _find_optimal(map(len, items), *args, **kwargs)
753 info = _find_optimal(map(len, items), *args, **kwargs)
796 nrow, ncol = info['rows_numbers'], info['columns_numbers']
754 nrow, ncol = info['rows_numbers'], info['columns_numbers']
797 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
755 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
798
756
799
757
800 def columnize(items, separator=' ', displaywidth=80):
758 def columnize(items, separator=' ', displaywidth=80):
801 """ Transform a list of strings into a single string with columns.
759 """ Transform a list of strings into a single string with columns.
802
760
803 Parameters
761 Parameters
804 ----------
762 ----------
805 items : sequence of strings
763 items : sequence of strings
806 The strings to process.
764 The strings to process.
807
765
808 separator : str, optional [default is two spaces]
766 separator : str, optional [default is two spaces]
809 The string that separates columns.
767 The string that separates columns.
810
768
811 displaywidth : int, optional [default is 80]
769 displaywidth : int, optional [default is 80]
812 Width of the display in number of characters.
770 Width of the display in number of characters.
813
771
814 Returns
772 Returns
815 -------
773 -------
816 The formatted string.
774 The formatted string.
817 """
775 """
818 if not items :
776 if not items :
819 return '\n'
777 return '\n'
820 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
778 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
821 fmatrix = [filter(None, x) for x in matrix]
779 fmatrix = [filter(None, x) for x in matrix]
822 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
780 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
823 return '\n'.join(map(sjoin, fmatrix))+'\n'
781 return '\n'.join(map(sjoin, fmatrix))+'\n'
General Comments 0
You need to be logged in to leave comments. Login now