##// END OF EJS Templates
Merge pull request #2261 from bfroehle/empty_paste_fix...
Thomas Kluyver -
r8148:e431d6ba merge
parent child Browse files
Show More
@@ -1,170 +1,173
1 1 # encoding: utf-8
2 2 """Tests for IPython.utils.text"""
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2011 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15 import os
16 16 import math
17 17 import random
18 18
19 19 import nose.tools as nt
20 20
21 21 from nose import with_setup
22 22
23 23 from IPython.testing import decorators as dec
24 24 from IPython.utils import text
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Globals
28 28 #-----------------------------------------------------------------------------
29 29
30 30 def test_columnize():
31 31 """Basic columnize tests."""
32 32 size = 5
33 33 items = [l*size for l in 'abc']
34 34 out = text.columnize(items, displaywidth=80)
35 35 nt.assert_equal(out, 'aaaaa bbbbb ccccc\n')
36 36 out = text.columnize(items, displaywidth=12)
37 37 nt.assert_equal(out, 'aaaaa ccccc\nbbbbb\n')
38 38 out = text.columnize(items, displaywidth=10)
39 39 nt.assert_equal(out, 'aaaaa\nbbbbb\nccccc\n')
40 40
41 41 def test_columnize_random():
42 42 """Test with random input to hopfully catch edge case """
43 43 for nitems in [random.randint(2,70) for i in range(2,20)]:
44 44 displaywidth = random.randint(20,200)
45 45 rand_len = [random.randint(2,displaywidth) for i in range(nitems)]
46 46 items = ['x'*l for l in rand_len]
47 47 out = text.columnize(items, displaywidth=displaywidth)
48 48 longer_line = max([len(x) for x in out.split('\n')])
49 49 longer_element = max(rand_len)
50 50 if longer_line > displaywidth:
51 51 print "Columnize displayed something lager than displaywidth : %s " % longer_line
52 52 print "longer element : %s " % longer_element
53 53 print "displaywidth : %s " % displaywidth
54 54 print "number of element : %s " % nitems
55 55 print "size of each element :\n %s" % rand_len
56 56 assert False
57 57
58 58 def test_columnize_medium():
59 59 """Test with inputs than shouldn't be wider tahn 80 """
60 60 size = 40
61 61 items = [l*size for l in 'abc']
62 62 out = text.columnize(items, displaywidth=80)
63 63 nt.assert_equal(out, '\n'.join(items+['']))
64 64
65 65 def test_columnize_long():
66 66 """Test columnize with inputs longer than the display window"""
67 67 size = 11
68 68 items = [l*size for l in 'abc']
69 69 out = text.columnize(items, displaywidth=size-1)
70 70 nt.assert_equal(out, '\n'.join(items+['']))
71 71
72 72 def eval_formatter_check(f):
73 73 ns = dict(n=12, pi=math.pi, stuff='hello there', os=os, u=u"cafΓ©", b="cafΓ©")
74 74 s = f.format("{n} {n//4} {stuff.split()[0]}", **ns)
75 75 nt.assert_equal(s, "12 3 hello")
76 76 s = f.format(' '.join(['{n//%i}'%i for i in range(1,8)]), **ns)
77 77 nt.assert_equal(s, "12 6 4 3 2 2 1")
78 78 s = f.format('{[n//i for i in range(1,8)]}', **ns)
79 79 nt.assert_equal(s, "[12, 6, 4, 3, 2, 2, 1]")
80 80 s = f.format("{stuff!s}", **ns)
81 81 nt.assert_equal(s, ns['stuff'])
82 82 s = f.format("{stuff!r}", **ns)
83 83 nt.assert_equal(s, repr(ns['stuff']))
84 84
85 85 # Check with unicode:
86 86 s = f.format("{u}", **ns)
87 87 nt.assert_equal(s, ns['u'])
88 88 # This decodes in a platform dependent manner, but it shouldn't error out
89 89 s = f.format("{b}", **ns)
90 90
91 91 nt.assert_raises(NameError, f.format, '{dne}', **ns)
92 92
93 93 def eval_formatter_slicing_check(f):
94 94 ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)
95 95 s = f.format(" {stuff.split()[:]} ", **ns)
96 96 nt.assert_equal(s, " ['hello', 'there'] ")
97 97 s = f.format(" {stuff.split()[::-1]} ", **ns)
98 98 nt.assert_equal(s, " ['there', 'hello'] ")
99 99 s = f.format("{stuff[::2]}", **ns)
100 100 nt.assert_equal(s, ns['stuff'][::2])
101 101
102 102 nt.assert_raises(SyntaxError, f.format, "{n:x}", **ns)
103 103
104 104 def eval_formatter_no_slicing_check(f):
105 105 ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)
106 106
107 107 s = f.format('{n:x} {pi**2:+f}', **ns)
108 108 nt.assert_equal(s, "c +9.869604")
109 109
110 110 s = f.format('{stuff[slice(1,4)]}', **ns)
111 111 nt.assert_equal(s, 'ell')
112 112
113 113 nt.assert_raises(SyntaxError, f.format, "{a[:]}")
114 114
115 115 def test_eval_formatter():
116 116 f = text.EvalFormatter()
117 117 eval_formatter_check(f)
118 118 eval_formatter_no_slicing_check(f)
119 119
120 120 def test_full_eval_formatter():
121 121 f = text.FullEvalFormatter()
122 122 eval_formatter_check(f)
123 123 eval_formatter_slicing_check(f)
124 124
125 125 def test_dollar_formatter():
126 126 f = text.DollarFormatter()
127 127 eval_formatter_check(f)
128 128 eval_formatter_slicing_check(f)
129 129
130 130 ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)
131 131 s = f.format("$n", **ns)
132 132 nt.assert_equal(s, "12")
133 133 s = f.format("$n.real", **ns)
134 134 nt.assert_equal(s, "12")
135 135 s = f.format("$n/{stuff[:5]}", **ns)
136 136 nt.assert_equal(s, "12/hello")
137 137 s = f.format("$n $$HOME", **ns)
138 138 nt.assert_equal(s, "12 $HOME")
139 139 s = f.format("${foo}", foo="HOME")
140 140 nt.assert_equal(s, "$HOME")
141 141
142 142
143 143 def test_long_substr():
144 144 data = ['hi']
145 145 nt.assert_equal(text.long_substr(data), 'hi')
146 146
147 147
148 148 def test_long_substr2():
149 149 data = ['abc', 'abd', 'abf', 'ab']
150 150 nt.assert_equal(text.long_substr(data), 'ab')
151 151
152 def test_long_substr_empty():
153 data = []
154 nt.assert_equal(text.long_substr(data), '')
152 155
153 156 def test_strip_email():
154 157 src = """\
155 158 >> >>> def f(x):
156 159 >> ... return x+1
157 160 >> ...
158 161 >> >>> zz = f(2.5)"""
159 162 cln = """\
160 163 >>> def f(x):
161 164 ... return x+1
162 165 ...
163 166 >>> zz = f(2.5)"""
164 167 nt.assert_equal(text.strip_email_quotes(src), cln)
165 168
166 169
167 170 def test_strip_email2():
168 171 src = '> > > list()'
169 172 cln = 'list()'
170 173 nt.assert_equal(text.strip_email_quotes(src), cln)
@@ -1,845 +1,845
1 1 # encoding: utf-8
2 2 """
3 3 Utilities for working with strings and text.
4 4 """
5 5
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2008-2011 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Imports
15 15 #-----------------------------------------------------------------------------
16 16
17 17 import __main__
18 18
19 19 import os
20 20 import re
21 21 import shutil
22 22 import sys
23 23 import textwrap
24 24 from string import Formatter
25 25
26 26 from IPython.external.path import path
27 27 from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
28 28 from IPython.utils import py3compat
29 29 from IPython.utils.io import nlprint
30 30 from IPython.utils.data import flatten
31 31
32 32 #-----------------------------------------------------------------------------
33 33 # Code
34 34 #-----------------------------------------------------------------------------
35 35
36 36 def unquote_ends(istr):
37 37 """Remove a single pair of quotes from the endpoints of a string."""
38 38
39 39 if not istr:
40 40 return istr
41 41 if (istr[0]=="'" and istr[-1]=="'") or \
42 42 (istr[0]=='"' and istr[-1]=='"'):
43 43 return istr[1:-1]
44 44 else:
45 45 return istr
46 46
47 47
48 48 class LSString(str):
49 49 """String derivative with a special access attributes.
50 50
51 51 These are normal strings, but with the special attributes:
52 52
53 53 .l (or .list) : value as list (split on newlines).
54 54 .n (or .nlstr): original value (the string itself).
55 55 .s (or .spstr): value as whitespace-separated string.
56 56 .p (or .paths): list of path objects
57 57
58 58 Any values which require transformations are computed only once and
59 59 cached.
60 60
61 61 Such strings are very useful to efficiently interact with the shell, which
62 62 typically only understands whitespace-separated options for commands."""
63 63
64 64 def get_list(self):
65 65 try:
66 66 return self.__list
67 67 except AttributeError:
68 68 self.__list = self.split('\n')
69 69 return self.__list
70 70
71 71 l = list = property(get_list)
72 72
73 73 def get_spstr(self):
74 74 try:
75 75 return self.__spstr
76 76 except AttributeError:
77 77 self.__spstr = self.replace('\n',' ')
78 78 return self.__spstr
79 79
80 80 s = spstr = property(get_spstr)
81 81
82 82 def get_nlstr(self):
83 83 return self
84 84
85 85 n = nlstr = property(get_nlstr)
86 86
87 87 def get_paths(self):
88 88 try:
89 89 return self.__paths
90 90 except AttributeError:
91 91 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
92 92 return self.__paths
93 93
94 94 p = paths = property(get_paths)
95 95
96 96 # FIXME: We need to reimplement type specific displayhook and then add this
97 97 # back as a custom printer. This should also be moved outside utils into the
98 98 # core.
99 99
100 100 # def print_lsstring(arg):
101 101 # """ Prettier (non-repr-like) and more informative printer for LSString """
102 102 # print "LSString (.p, .n, .l, .s available). Value:"
103 103 # print arg
104 104 #
105 105 #
106 106 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
107 107
108 108
109 109 class SList(list):
110 110 """List derivative with a special access attributes.
111 111
112 112 These are normal lists, but with the special attributes:
113 113
114 114 .l (or .list) : value as list (the list itself).
115 115 .n (or .nlstr): value as a string, joined on newlines.
116 116 .s (or .spstr): value as a string, joined on spaces.
117 117 .p (or .paths): list of path objects
118 118
119 119 Any values which require transformations are computed only once and
120 120 cached."""
121 121
122 122 def get_list(self):
123 123 return self
124 124
125 125 l = list = property(get_list)
126 126
127 127 def get_spstr(self):
128 128 try:
129 129 return self.__spstr
130 130 except AttributeError:
131 131 self.__spstr = ' '.join(self)
132 132 return self.__spstr
133 133
134 134 s = spstr = property(get_spstr)
135 135
136 136 def get_nlstr(self):
137 137 try:
138 138 return self.__nlstr
139 139 except AttributeError:
140 140 self.__nlstr = '\n'.join(self)
141 141 return self.__nlstr
142 142
143 143 n = nlstr = property(get_nlstr)
144 144
145 145 def get_paths(self):
146 146 try:
147 147 return self.__paths
148 148 except AttributeError:
149 149 self.__paths = [path(p) for p in self if os.path.exists(p)]
150 150 return self.__paths
151 151
152 152 p = paths = property(get_paths)
153 153
154 154 def grep(self, pattern, prune = False, field = None):
155 155 """ Return all strings matching 'pattern' (a regex or callable)
156 156
157 157 This is case-insensitive. If prune is true, return all items
158 158 NOT matching the pattern.
159 159
160 160 If field is specified, the match must occur in the specified
161 161 whitespace-separated field.
162 162
163 163 Examples::
164 164
165 165 a.grep( lambda x: x.startswith('C') )
166 166 a.grep('Cha.*log', prune=1)
167 167 a.grep('chm', field=-1)
168 168 """
169 169
170 170 def match_target(s):
171 171 if field is None:
172 172 return s
173 173 parts = s.split()
174 174 try:
175 175 tgt = parts[field]
176 176 return tgt
177 177 except IndexError:
178 178 return ""
179 179
180 180 if isinstance(pattern, basestring):
181 181 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
182 182 else:
183 183 pred = pattern
184 184 if not prune:
185 185 return SList([el for el in self if pred(match_target(el))])
186 186 else:
187 187 return SList([el for el in self if not pred(match_target(el))])
188 188
189 189 def fields(self, *fields):
190 190 """ Collect whitespace-separated fields from string list
191 191
192 192 Allows quick awk-like usage of string lists.
193 193
194 194 Example data (in var a, created by 'a = !ls -l')::
195 195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
196 196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
197 197
198 198 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
199 199 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
200 200 (note the joining by space).
201 201 a.fields(-1) is ['ChangeLog', 'IPython']
202 202
203 203 IndexErrors are ignored.
204 204
205 205 Without args, fields() just split()'s the strings.
206 206 """
207 207 if len(fields) == 0:
208 208 return [el.split() for el in self]
209 209
210 210 res = SList()
211 211 for el in [f.split() for f in self]:
212 212 lineparts = []
213 213
214 214 for fd in fields:
215 215 try:
216 216 lineparts.append(el[fd])
217 217 except IndexError:
218 218 pass
219 219 if lineparts:
220 220 res.append(" ".join(lineparts))
221 221
222 222 return res
223 223
224 224 def sort(self,field= None, nums = False):
225 225 """ sort by specified fields (see fields())
226 226
227 227 Example::
228 228 a.sort(1, nums = True)
229 229
230 230 Sorts a by second field, in numerical order (so that 21 > 3)
231 231
232 232 """
233 233
234 234 #decorate, sort, undecorate
235 235 if field is not None:
236 236 dsu = [[SList([line]).fields(field), line] for line in self]
237 237 else:
238 238 dsu = [[line, line] for line in self]
239 239 if nums:
240 240 for i in range(len(dsu)):
241 241 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
242 242 try:
243 243 n = int(numstr)
244 244 except ValueError:
245 245 n = 0;
246 246 dsu[i][0] = n
247 247
248 248
249 249 dsu.sort()
250 250 return SList([t[1] for t in dsu])
251 251
252 252
253 253 # FIXME: We need to reimplement type specific displayhook and then add this
254 254 # back as a custom printer. This should also be moved outside utils into the
255 255 # core.
256 256
257 257 # def print_slist(arg):
258 258 # """ Prettier (non-repr-like) and more informative printer for SList """
259 259 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
260 260 # if hasattr(arg, 'hideonce') and arg.hideonce:
261 261 # arg.hideonce = False
262 262 # return
263 263 #
264 264 # nlprint(arg)
265 265 #
266 266 # print_slist = result_display.when_type(SList)(print_slist)
267 267
268 268
269 269 def esc_quotes(strng):
270 270 """Return the input string with single and double quotes escaped out"""
271 271
272 272 return strng.replace('"','\\"').replace("'","\\'")
273 273
274 274
275 275 def qw(words,flat=0,sep=None,maxsplit=-1):
276 276 """Similar to Perl's qw() operator, but with some more options.
277 277
278 278 qw(words,flat=0,sep=' ',maxsplit=-1) -> words.split(sep,maxsplit)
279 279
280 280 words can also be a list itself, and with flat=1, the output will be
281 281 recursively flattened.
282 282
283 283 Examples:
284 284
285 285 >>> qw('1 2')
286 286 ['1', '2']
287 287
288 288 >>> qw(['a b','1 2',['m n','p q']])
289 289 [['a', 'b'], ['1', '2'], [['m', 'n'], ['p', 'q']]]
290 290
291 291 >>> qw(['a b','1 2',['m n','p q']],flat=1)
292 292 ['a', 'b', '1', '2', 'm', 'n', 'p', 'q']
293 293 """
294 294
295 295 if isinstance(words, basestring):
296 296 return [word.strip() for word in words.split(sep,maxsplit)
297 297 if word and not word.isspace() ]
298 298 if flat:
299 299 return flatten(map(qw,words,[1]*len(words)))
300 300 return map(qw,words)
301 301
302 302
303 303 def qwflat(words,sep=None,maxsplit=-1):
304 304 """Calls qw(words) in flat mode. It's just a convenient shorthand."""
305 305 return qw(words,1,sep,maxsplit)
306 306
307 307
308 308 def qw_lol(indata):
309 309 """qw_lol('a b') -> [['a','b']],
310 310 otherwise it's just a call to qw().
311 311
312 312 We need this to make sure the modules_some keys *always* end up as a
313 313 list of lists."""
314 314
315 315 if isinstance(indata, basestring):
316 316 return [qw(indata)]
317 317 else:
318 318 return qw(indata)
319 319
320 320
321 321 def grep(pat,list,case=1):
322 322 """Simple minded grep-like function.
323 323 grep(pat,list) returns occurrences of pat in list, None on failure.
324 324
325 325 It only does simple string matching, with no support for regexps. Use the
326 326 option case=0 for case-insensitive matching."""
327 327
328 328 # This is pretty crude. At least it should implement copying only references
329 329 # to the original data in case it's big. Now it copies the data for output.
330 330 out=[]
331 331 if case:
332 332 for term in list:
333 333 if term.find(pat)>-1: out.append(term)
334 334 else:
335 335 lpat=pat.lower()
336 336 for term in list:
337 337 if term.lower().find(lpat)>-1: out.append(term)
338 338
339 339 if len(out): return out
340 340 else: return None
341 341
342 342
343 343 def dgrep(pat,*opts):
344 344 """Return grep() on dir()+dir(__builtins__).
345 345
346 346 A very common use of grep() when working interactively."""
347 347
348 348 return grep(pat,dir(__main__)+dir(__main__.__builtins__),*opts)
349 349
350 350
351 351 def idgrep(pat):
352 352 """Case-insensitive dgrep()"""
353 353
354 354 return dgrep(pat,0)
355 355
356 356
357 357 def igrep(pat,list):
358 358 """Synonym for case-insensitive grep."""
359 359
360 360 return grep(pat,list,case=0)
361 361
362 362
363 363 def indent(instr,nspaces=4, ntabs=0, flatten=False):
364 364 """Indent a string a given number of spaces or tabstops.
365 365
366 366 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
367 367
368 368 Parameters
369 369 ----------
370 370
371 371 instr : basestring
372 372 The string to be indented.
373 373 nspaces : int (default: 4)
374 374 The number of spaces to be indented.
375 375 ntabs : int (default: 0)
376 376 The number of tabs to be indented.
377 377 flatten : bool (default: False)
378 378 Whether to scrub existing indentation. If True, all lines will be
379 379 aligned to the same indentation. If False, existing indentation will
380 380 be strictly increased.
381 381
382 382 Returns
383 383 -------
384 384
385 385 str|unicode : string indented by ntabs and nspaces.
386 386
387 387 """
388 388 if instr is None:
389 389 return
390 390 ind = '\t'*ntabs+' '*nspaces
391 391 if flatten:
392 392 pat = re.compile(r'^\s*', re.MULTILINE)
393 393 else:
394 394 pat = re.compile(r'^', re.MULTILINE)
395 395 outstr = re.sub(pat, ind, instr)
396 396 if outstr.endswith(os.linesep+ind):
397 397 return outstr[:-len(ind)]
398 398 else:
399 399 return outstr
400 400
401 401 def native_line_ends(filename,backup=1):
402 402 """Convert (in-place) a file to line-ends native to the current OS.
403 403
404 404 If the optional backup argument is given as false, no backup of the
405 405 original file is left. """
406 406
407 407 backup_suffixes = {'posix':'~','dos':'.bak','nt':'.bak','mac':'.bak'}
408 408
409 409 bak_filename = filename + backup_suffixes[os.name]
410 410
411 411 original = open(filename).read()
412 412 shutil.copy2(filename,bak_filename)
413 413 try:
414 414 new = open(filename,'wb')
415 415 new.write(os.linesep.join(original.splitlines()))
416 416 new.write(os.linesep) # ALWAYS put an eol at the end of the file
417 417 new.close()
418 418 except:
419 419 os.rename(bak_filename,filename)
420 420 if not backup:
421 421 try:
422 422 os.remove(bak_filename)
423 423 except:
424 424 pass
425 425
426 426
427 427 def list_strings(arg):
428 428 """Always return a list of strings, given a string or list of strings
429 429 as input.
430 430
431 431 :Examples:
432 432
433 433 In [7]: list_strings('A single string')
434 434 Out[7]: ['A single string']
435 435
436 436 In [8]: list_strings(['A single string in a list'])
437 437 Out[8]: ['A single string in a list']
438 438
439 439 In [9]: list_strings(['A','list','of','strings'])
440 440 Out[9]: ['A', 'list', 'of', 'strings']
441 441 """
442 442
443 443 if isinstance(arg,basestring): return [arg]
444 444 else: return arg
445 445
446 446
447 447 def marquee(txt='',width=78,mark='*'):
448 448 """Return the input string centered in a 'marquee'.
449 449
450 450 :Examples:
451 451
452 452 In [16]: marquee('A test',40)
453 453 Out[16]: '**************** A test ****************'
454 454
455 455 In [17]: marquee('A test',40,'-')
456 456 Out[17]: '---------------- A test ----------------'
457 457
458 458 In [18]: marquee('A test',40,' ')
459 459 Out[18]: ' A test '
460 460
461 461 """
462 462 if not txt:
463 463 return (mark*width)[:width]
464 464 nmark = (width-len(txt)-2)//len(mark)//2
465 465 if nmark < 0: nmark =0
466 466 marks = mark*nmark
467 467 return '%s %s %s' % (marks,txt,marks)
468 468
469 469
470 470 ini_spaces_re = re.compile(r'^(\s+)')
471 471
472 472 def num_ini_spaces(strng):
473 473 """Return the number of initial spaces in a string"""
474 474
475 475 ini_spaces = ini_spaces_re.match(strng)
476 476 if ini_spaces:
477 477 return ini_spaces.end()
478 478 else:
479 479 return 0
480 480
481 481
482 482 def format_screen(strng):
483 483 """Format a string for screen printing.
484 484
485 485 This removes some latex-type format codes."""
486 486 # Paragraph continue
487 487 par_re = re.compile(r'\\$',re.MULTILINE)
488 488 strng = par_re.sub('',strng)
489 489 return strng
490 490
491 491
492 492 def dedent(text):
493 493 """Equivalent of textwrap.dedent that ignores unindented first line.
494 494
495 495 This means it will still dedent strings like:
496 496 '''foo
497 497 is a bar
498 498 '''
499 499
500 500 For use in wrap_paragraphs.
501 501 """
502 502
503 503 if text.startswith('\n'):
504 504 # text starts with blank line, don't ignore the first line
505 505 return textwrap.dedent(text)
506 506
507 507 # split first line
508 508 splits = text.split('\n',1)
509 509 if len(splits) == 1:
510 510 # only one line
511 511 return textwrap.dedent(text)
512 512
513 513 first, rest = splits
514 514 # dedent everything but the first line
515 515 rest = textwrap.dedent(rest)
516 516 return '\n'.join([first, rest])
517 517
518 518
519 519 def wrap_paragraphs(text, ncols=80):
520 520 """Wrap multiple paragraphs to fit a specified width.
521 521
522 522 This is equivalent to textwrap.wrap, but with support for multiple
523 523 paragraphs, as separated by empty lines.
524 524
525 525 Returns
526 526 -------
527 527
528 528 list of complete paragraphs, wrapped to fill `ncols` columns.
529 529 """
530 530 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
531 531 text = dedent(text).strip()
532 532 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
533 533 out_ps = []
534 534 indent_re = re.compile(r'\n\s+', re.MULTILINE)
535 535 for p in paragraphs:
536 536 # presume indentation that survives dedent is meaningful formatting,
537 537 # so don't fill unless text is flush.
538 538 if indent_re.search(p) is None:
539 539 # wrap paragraph
540 540 p = textwrap.fill(p, ncols)
541 541 out_ps.append(p)
542 542 return out_ps
543 543
544 544
545 545 def long_substr(data):
546 546 """Return the longest common substring in a list of strings.
547 547
548 548 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
549 549 """
550 550 substr = ''
551 551 if len(data) > 1 and len(data[0]) > 0:
552 552 for i in range(len(data[0])):
553 553 for j in range(len(data[0])-i+1):
554 554 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
555 555 substr = data[0][i:i+j]
556 else:
556 elif len(data) == 1:
557 557 substr = data[0]
558 558 return substr
559 559
560 560
561 561 def strip_email_quotes(text):
562 562 """Strip leading email quotation characters ('>').
563 563
564 564 Removes any combination of leading '>' interspersed with whitespace that
565 565 appears *identically* in all lines of the input text.
566 566
567 567 Parameters
568 568 ----------
569 569 text : str
570 570
571 571 Examples
572 572 --------
573 573
574 574 Simple uses::
575 575
576 576 In [2]: strip_email_quotes('> > text')
577 577 Out[2]: 'text'
578 578
579 579 In [3]: strip_email_quotes('> > text\\n> > more')
580 580 Out[3]: 'text\\nmore'
581 581
582 582 Note how only the common prefix that appears in all lines is stripped::
583 583
584 584 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
585 585 Out[4]: '> text\\n> more\\nmore...'
586 586
587 587 So if any line has no quote marks ('>') , then none are stripped from any
588 588 of them ::
589 589
590 590 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
591 591 Out[5]: '> > text\\n> > more\\nlast different'
592 592 """
593 593 lines = text.splitlines()
594 594 matches = set()
595 595 for line in lines:
596 596 prefix = re.match(r'^(\s*>[ >]*)', line)
597 597 if prefix:
598 598 matches.add(prefix.group(1))
599 599 else:
600 600 break
601 601 else:
602 602 prefix = long_substr(list(matches))
603 603 if prefix:
604 604 strip = len(prefix)
605 605 text = '\n'.join([ ln[strip:] for ln in lines])
606 606 return text
607 607
608 608
609 609 class EvalFormatter(Formatter):
610 610 """A String Formatter that allows evaluation of simple expressions.
611 611
612 612 Note that this version interprets a : as specifying a format string (as per
613 613 standard string formatting), so if slicing is required, you must explicitly
614 614 create a slice.
615 615
616 616 This is to be used in templating cases, such as the parallel batch
617 617 script templates, where simple arithmetic on arguments is useful.
618 618
619 619 Examples
620 620 --------
621 621
622 622 In [1]: f = EvalFormatter()
623 623 In [2]: f.format('{n//4}', n=8)
624 624 Out [2]: '2'
625 625
626 626 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
627 627 Out [3]: 'll'
628 628 """
629 629 def get_field(self, name, args, kwargs):
630 630 v = eval(name, kwargs)
631 631 return v, name
632 632
633 633
634 634 @skip_doctest_py3
635 635 class FullEvalFormatter(Formatter):
636 636 """A String Formatter that allows evaluation of simple expressions.
637 637
638 638 Any time a format key is not found in the kwargs,
639 639 it will be tried as an expression in the kwargs namespace.
640 640
641 641 Note that this version allows slicing using [1:2], so you cannot specify
642 642 a format string. Use :class:`EvalFormatter` to permit format strings.
643 643
644 644 Examples
645 645 --------
646 646
647 647 In [1]: f = FullEvalFormatter()
648 648 In [2]: f.format('{n//4}', n=8)
649 649 Out[2]: u'2'
650 650
651 651 In [3]: f.format('{list(range(5))[2:4]}')
652 652 Out[3]: u'[2, 3]'
653 653
654 654 In [4]: f.format('{3*2}')
655 655 Out[4]: u'6'
656 656 """
657 657 # copied from Formatter._vformat with minor changes to allow eval
658 658 # and replace the format_spec code with slicing
659 659 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
660 660 if recursion_depth < 0:
661 661 raise ValueError('Max string recursion exceeded')
662 662 result = []
663 663 for literal_text, field_name, format_spec, conversion in \
664 664 self.parse(format_string):
665 665
666 666 # output the literal text
667 667 if literal_text:
668 668 result.append(literal_text)
669 669
670 670 # if there's a field, output it
671 671 if field_name is not None:
672 672 # this is some markup, find the object and do
673 673 # the formatting
674 674
675 675 if format_spec:
676 676 # override format spec, to allow slicing:
677 677 field_name = ':'.join([field_name, format_spec])
678 678
679 679 # eval the contents of the field for the object
680 680 # to be formatted
681 681 obj = eval(field_name, kwargs)
682 682
683 683 # do any conversion on the resulting object
684 684 obj = self.convert_field(obj, conversion)
685 685
686 686 # format the object and append to the result
687 687 result.append(self.format_field(obj, ''))
688 688
689 689 return u''.join(py3compat.cast_unicode(s) for s in result)
690 690
691 691
692 692 @skip_doctest_py3
693 693 class DollarFormatter(FullEvalFormatter):
694 694 """Formatter allowing Itpl style $foo replacement, for names and attribute
695 695 access only. Standard {foo} replacement also works, and allows full
696 696 evaluation of its arguments.
697 697
698 698 Examples
699 699 --------
700 700 In [1]: f = DollarFormatter()
701 701 In [2]: f.format('{n//4}', n=8)
702 702 Out[2]: u'2'
703 703
704 704 In [3]: f.format('23 * 76 is $result', result=23*76)
705 705 Out[3]: u'23 * 76 is 1748'
706 706
707 707 In [4]: f.format('$a or {b}', a=1, b=2)
708 708 Out[4]: u'1 or 2'
709 709 """
710 710 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
711 711 def parse(self, fmt_string):
712 712 for literal_txt, field_name, format_spec, conversion \
713 713 in Formatter.parse(self, fmt_string):
714 714
715 715 # Find $foo patterns in the literal text.
716 716 continue_from = 0
717 717 txt = ""
718 718 for m in self._dollar_pattern.finditer(literal_txt):
719 719 new_txt, new_field = m.group(1,2)
720 720 # $$foo --> $foo
721 721 if new_field.startswith("$"):
722 722 txt += new_txt + new_field
723 723 else:
724 724 yield (txt + new_txt, new_field, "", None)
725 725 txt = ""
726 726 continue_from = m.end()
727 727
728 728 # Re-yield the {foo} style pattern
729 729 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
730 730
731 731 #-----------------------------------------------------------------------------
732 732 # Utils to columnize a list of string
733 733 #-----------------------------------------------------------------------------
734 734
735 735 def _chunks(l, n):
736 736 """Yield successive n-sized chunks from l."""
737 737 for i in xrange(0, len(l), n):
738 738 yield l[i:i+n]
739 739
740 740
741 741 def _find_optimal(rlist , separator_size=2 , displaywidth=80):
742 742 """Calculate optimal info to columnize a list of string"""
743 743 for nrow in range(1, len(rlist)+1) :
744 744 chk = map(max,_chunks(rlist, nrow))
745 745 sumlength = sum(chk)
746 746 ncols = len(chk)
747 747 if sumlength+separator_size*(ncols-1) <= displaywidth :
748 748 break;
749 749 return {'columns_numbers' : ncols,
750 750 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
751 751 'rows_numbers' : nrow,
752 752 'columns_width' : chk
753 753 }
754 754
755 755
756 756 def _get_or_default(mylist, i, default=None):
757 757 """return list item number, or default if don't exist"""
758 758 if i >= len(mylist):
759 759 return default
760 760 else :
761 761 return mylist[i]
762 762
763 763
764 764 @skip_doctest
765 765 def compute_item_matrix(items, empty=None, *args, **kwargs) :
766 766 """Returns a nested list, and info to columnize items
767 767
768 768 Parameters :
769 769 ------------
770 770
771 771 items :
772 772 list of strings to columize
773 773 empty : (default None)
774 774 default value to fill list if needed
775 775 separator_size : int (default=2)
776 776 How much caracters will be used as a separation between each columns.
777 777 displaywidth : int (default=80)
778 778 The width of the area onto wich the columns should enter
779 779
780 780 Returns :
781 781 ---------
782 782
783 783 Returns a tuple of (strings_matrix, dict_info)
784 784
785 785 strings_matrix :
786 786
787 787 nested list of string, the outer most list contains as many list as
788 788 rows, the innermost lists have each as many element as colums. If the
789 789 total number of elements in `items` does not equal the product of
790 790 rows*columns, the last element of some lists are filled with `None`.
791 791
792 792 dict_info :
793 793 some info to make columnize easier:
794 794
795 795 columns_numbers : number of columns
796 796 rows_numbers : number of rows
797 797 columns_width : list of with of each columns
798 798 optimal_separator_width : best separator width between columns
799 799
800 800 Exemple :
801 801 ---------
802 802
803 803 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
804 804 ...: compute_item_matrix(l,displaywidth=12)
805 805 Out[1]:
806 806 ([['aaa', 'f', 'k'],
807 807 ['b', 'g', 'l'],
808 808 ['cc', 'h', None],
809 809 ['d', 'i', None],
810 810 ['eeeee', 'j', None]],
811 811 {'columns_numbers': 3,
812 812 'columns_width': [5, 1, 1],
813 813 'optimal_separator_width': 2,
814 814 'rows_numbers': 5})
815 815
816 816 """
817 817 info = _find_optimal(map(len, items), *args, **kwargs)
818 818 nrow, ncol = info['rows_numbers'], info['columns_numbers']
819 819 return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
820 820
821 821
822 822 def columnize(items, separator=' ', displaywidth=80):
823 823 """ Transform a list of strings into a single string with columns.
824 824
825 825 Parameters
826 826 ----------
827 827 items : sequence of strings
828 828 The strings to process.
829 829
830 830 separator : str, optional [default is two spaces]
831 831 The string that separates columns.
832 832
833 833 displaywidth : int, optional [default is 80]
834 834 Width of the display in number of characters.
835 835
836 836 Returns
837 837 -------
838 838 The formatted string.
839 839 """
840 840 if not items :
841 841 return '\n'
842 842 matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
843 843 fmatrix = [filter(None, x) for x in matrix]
844 844 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
845 845 return '\n'.join(map(sjoin, fmatrix))+'\n'
General Comments 0
You need to be logged in to leave comments. Login now