##// END OF EJS Templates
remove cast_unicode and add some typings
Matthias Bussonnier -
Show More
@@ -1,773 +1,773 b''
1 1 """DEPRECATED: Input handling and transformation machinery.
2 2
3 3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
4 4
5 5 The first class in this module, :class:`InputSplitter`, is designed to tell when
6 6 input from a line-oriented frontend is complete and should be executed, and when
7 7 the user should be prompted for another line of code instead. The name 'input
8 8 splitter' is largely for historical reasons.
9 9
10 10 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
11 11 with full support for the extended IPython syntax (magics, system calls, etc).
12 12 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
13 13 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
14 14 and stores the results.
15 15
16 16 For more details, see the class docstrings below.
17 17 """
18 18
19 19 from warnings import warn
20 20
21 21 warn('IPython.core.inputsplitter is deprecated since IPython 7 in favor of `IPython.core.inputtransformer2`',
22 22 DeprecationWarning)
23 23
24 24 # Copyright (c) IPython Development Team.
25 25 # Distributed under the terms of the Modified BSD License.
26 26 import ast
27 27 import codeop
28 28 import io
29 29 import re
30 30 import sys
31 31 import tokenize
32 32 import warnings
33 33
34 34 from IPython.utils.py3compat import cast_unicode
35 35 from IPython.core.inputtransformer import (leading_indent,
36 36 classic_prompt,
37 37 ipy_prompt,
38 38 cellmagic,
39 39 assemble_logical_lines,
40 40 help_end,
41 41 escaped_commands,
42 42 assign_from_magic,
43 43 assign_from_system,
44 44 assemble_python_lines,
45 45 )
46 46
47 47 # These are available in this module for backwards compatibility.
48 48 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
49 49 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
50 50 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
51 51
52 52 #-----------------------------------------------------------------------------
53 53 # Utilities
54 54 #-----------------------------------------------------------------------------
55 55
56 56 # FIXME: These are general-purpose utilities that later can be moved to the
57 57 # general ward. Kept here for now because we're being very strict about test
58 58 # coverage with this code, and this lets us ensure that we keep 100% coverage
59 59 # while developing.
60 60
61 61 # compiled regexps for autoindent management
62 62 dedent_re = re.compile('|'.join([
63 63 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
64 64 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
65 65 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
66 66 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
67 67 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
68 68 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
69 69 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
70 70 ]))
71 71 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
72 72
73 73 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
74 74 # before pure comments
75 75 comment_line_re = re.compile(r'^\s*\#')
76 76
77 77
78 78 def num_ini_spaces(s):
79 79 """Return the number of initial spaces in a string.
80 80
81 81 Note that tabs are counted as a single space. For now, we do *not* support
82 82 mixing of tabs and spaces in the user's input.
83 83
84 84 Parameters
85 85 ----------
86 86 s : string
87 87
88 88 Returns
89 89 -------
90 90 n : int
91 91 """
92 92
93 93 ini_spaces = ini_spaces_re.match(s)
94 94 if ini_spaces:
95 95 return ini_spaces.end()
96 96 else:
97 97 return 0
98 98
99 99 # Fake token types for partial_tokenize:
100 100 INCOMPLETE_STRING = tokenize.N_TOKENS
101 101 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
102 102
103 103 # The 2 classes below have the same API as TokenInfo, but don't try to look up
104 104 # a token type name that they won't find.
105 105 class IncompleteString:
106 106 type = exact_type = INCOMPLETE_STRING
107 107 def __init__(self, s, start, end, line):
108 108 self.s = s
109 109 self.start = start
110 110 self.end = end
111 111 self.line = line
112 112
113 113 class InMultilineStatement:
114 114 type = exact_type = IN_MULTILINE_STATEMENT
115 115 def __init__(self, pos, line):
116 116 self.s = ''
117 117 self.start = self.end = pos
118 118 self.line = line
119 119
120 120 def partial_tokens(s):
121 121 """Iterate over tokens from a possibly-incomplete string of code.
122 122
123 123 This adds two special token types: INCOMPLETE_STRING and
124 124 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
125 125 represent the two main ways for code to be incomplete.
126 126 """
127 127 readline = io.StringIO(s).readline
128 128 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
129 129 try:
130 130 for token in tokenize.generate_tokens(readline):
131 131 yield token
132 132 except tokenize.TokenError as e:
133 133 # catch EOF error
134 134 lines = s.splitlines(keepends=True)
135 135 end = len(lines), len(lines[-1])
136 136 if 'multi-line string' in e.args[0]:
137 137 l, c = start = token.end
138 138 s = lines[l-1][c:] + ''.join(lines[l:])
139 139 yield IncompleteString(s, start, end, lines[-1])
140 140 elif 'multi-line statement' in e.args[0]:
141 141 yield InMultilineStatement(end, lines[-1])
142 142 else:
143 143 raise
144 144
145 145 def find_next_indent(code):
146 146 """Find the number of spaces for the next line of indentation"""
147 147 tokens = list(partial_tokens(code))
148 148 if tokens[-1].type == tokenize.ENDMARKER:
149 149 tokens.pop()
150 150 if not tokens:
151 151 return 0
152 152 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
153 153 tokens.pop()
154 154
155 155 if tokens[-1].type == INCOMPLETE_STRING:
156 156 # Inside a multiline string
157 157 return 0
158 158
159 159 # Find the indents used before
160 160 prev_indents = [0]
161 161 def _add_indent(n):
162 162 if n != prev_indents[-1]:
163 163 prev_indents.append(n)
164 164
165 165 tokiter = iter(tokens)
166 166 for tok in tokiter:
167 167 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
168 168 _add_indent(tok.end[1])
169 169 elif (tok.type == tokenize.NL):
170 170 try:
171 171 _add_indent(next(tokiter).start[1])
172 172 except StopIteration:
173 173 break
174 174
175 175 last_indent = prev_indents.pop()
176 176
177 177 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
178 178 if tokens[-1].type == IN_MULTILINE_STATEMENT:
179 179 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
180 180 return last_indent + 4
181 181 return last_indent
182 182
183 183 if tokens[-1].exact_type == tokenize.COLON:
184 184 # Line ends with colon - indent
185 185 return last_indent + 4
186 186
187 187 if last_indent:
188 188 # Examine the last line for dedent cues - statements like return or
189 189 # raise which normally end a block of code.
190 190 last_line_starts = 0
191 191 for i, tok in enumerate(tokens):
192 192 if tok.type == tokenize.NEWLINE:
193 193 last_line_starts = i + 1
194 194
195 195 last_line_tokens = tokens[last_line_starts:]
196 196 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
197 197 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
198 198 # Find the most recent indentation less than the current level
199 199 for indent in reversed(prev_indents):
200 200 if indent < last_indent:
201 201 return indent
202 202
203 203 return last_indent
204 204
205 205
206 206 def last_blank(src):
207 207 """Determine if the input source ends in a blank.
208 208
209 209 A blank is either a newline or a line consisting of whitespace.
210 210
211 211 Parameters
212 212 ----------
213 213 src : string
214 214 A single or multiline string.
215 215 """
216 216 if not src: return False
217 217 ll = src.splitlines()[-1]
218 218 return (ll == '') or ll.isspace()
219 219
220 220
221 221 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
222 222 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
223 223
224 224 def last_two_blanks(src):
225 225 """Determine if the input source ends in two blanks.
226 226
227 227 A blank is either a newline or a line consisting of whitespace.
228 228
229 229 Parameters
230 230 ----------
231 231 src : string
232 232 A single or multiline string.
233 233 """
234 234 if not src: return False
235 235 # The logic here is tricky: I couldn't get a regexp to work and pass all
236 236 # the tests, so I took a different approach: split the source by lines,
237 237 # grab the last two and prepend '###\n' as a stand-in for whatever was in
238 238 # the body before the last two lines. Then, with that structure, it's
239 239 # possible to analyze with two regexps. Not the most elegant solution, but
240 240 # it works. If anyone tries to change this logic, make sure to validate
241 241 # the whole test suite first!
242 242 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
243 243 return (bool(last_two_blanks_re.match(new_src)) or
244 244 bool(last_two_blanks_re2.match(new_src)) )
245 245
246 246
247 247 def remove_comments(src):
248 248 """Remove all comments from input source.
249 249
250 250 Note: comments are NOT recognized inside of strings!
251 251
252 252 Parameters
253 253 ----------
254 254 src : string
255 255 A single or multiline input string.
256 256
257 257 Returns
258 258 -------
259 259 String with all Python comments removed.
260 260 """
261 261
262 262 return re.sub('#.*', '', src)
263 263
264 264
265 265 def get_input_encoding():
266 266 """Return the default standard input encoding.
267 267
268 268 If sys.stdin has no encoding, 'ascii' is returned."""
269 269 # There are strange environments for which sys.stdin.encoding is None. We
270 270 # ensure that a valid encoding is returned.
271 271 encoding = getattr(sys.stdin, 'encoding', None)
272 272 if encoding is None:
273 273 encoding = 'ascii'
274 274 return encoding
275 275
276 276 #-----------------------------------------------------------------------------
277 277 # Classes and functions for normal Python syntax handling
278 278 #-----------------------------------------------------------------------------
279 279
280 280 class InputSplitter(object):
281 281 r"""An object that can accumulate lines of Python source before execution.
282 282
283 283 This object is designed to be fed python source line-by-line, using
284 284 :meth:`push`. It will return on each push whether the currently pushed
285 285 code could be executed already. In addition, it provides a method called
286 286 :meth:`push_accepts_more` that can be used to query whether more input
287 287 can be pushed into a single interactive block.
288 288
289 289 This is a simple example of how an interactive terminal-based client can use
290 290 this tool::
291 291
292 292 isp = InputSplitter()
293 293 while isp.push_accepts_more():
294 294 indent = ' '*isp.indent_spaces
295 295 prompt = '>>> ' + indent
296 296 line = indent + raw_input(prompt)
297 297 isp.push(line)
298 298 print 'Input source was:\n', isp.source_reset(),
299 299 """
300 300 # A cache for storing the current indentation
301 301 # The first value stores the most recently processed source input
302 302 # The second value is the number of spaces for the current indentation
303 303 # If self.source matches the first value, the second value is a valid
304 304 # current indentation. Otherwise, the cache is invalid and the indentation
305 305 # must be recalculated.
306 306 _indent_spaces_cache = None, None
307 307 # String, indicating the default input encoding. It is computed by default
308 308 # at initialization time via get_input_encoding(), but it can be reset by a
309 309 # client with specific knowledge of the encoding.
310 310 encoding = ''
311 311 # String where the current full source input is stored, properly encoded.
312 312 # Reading this attribute is the normal way of querying the currently pushed
313 313 # source code, that has been properly encoded.
314 314 source = ''
315 315 # Code object corresponding to the current source. It is automatically
316 316 # synced to the source, so it can be queried at any time to obtain the code
317 317 # object; it will be None if the source doesn't compile to valid Python.
318 318 code = None
319 319
320 320 # Private attributes
321 321
322 322 # List with lines of input accumulated so far
323 323 _buffer = None
324 324 # Command compiler
325 325 _compile = None
326 326 # Boolean indicating whether the current block is complete
327 327 _is_complete = None
328 328 # Boolean indicating whether the current block has an unrecoverable syntax error
329 329 _is_invalid = False
330 330
331 331 def __init__(self):
332 332 """Create a new InputSplitter instance.
333 333 """
334 334 self._buffer = []
335 335 self._compile = codeop.CommandCompiler()
336 336 self.encoding = get_input_encoding()
337 337
338 338 def reset(self):
339 339 """Reset the input buffer and associated state."""
340 340 self._buffer[:] = []
341 341 self.source = ''
342 342 self.code = None
343 343 self._is_complete = False
344 344 self._is_invalid = False
345 345
346 346 def source_reset(self):
347 347 """Return the input source and perform a full reset.
348 348 """
349 349 out = self.source
350 350 self.reset()
351 351 return out
352 352
353 353 def check_complete(self, source):
354 354 """Return whether a block of code is ready to execute, or should be continued
355 355
356 356 This is a non-stateful API, and will reset the state of this InputSplitter.
357 357
358 358 Parameters
359 359 ----------
360 360 source : string
361 361 Python input code, which can be multiline.
362 362
363 363 Returns
364 364 -------
365 365 status : str
366 366 One of 'complete', 'incomplete', or 'invalid' if source is not a
367 367 prefix of valid code.
368 368 indent_spaces : int or None
369 369 The number of spaces by which to indent the next line of code. If
370 370 status is not 'incomplete', this is None.
371 371 """
372 372 self.reset()
373 373 try:
374 374 self.push(source)
375 375 except SyntaxError:
376 376 # Transformers in IPythonInputSplitter can raise SyntaxError,
377 377 # which push() will not catch.
378 378 return 'invalid', None
379 379 else:
380 380 if self._is_invalid:
381 381 return 'invalid', None
382 382 elif self.push_accepts_more():
383 383 return 'incomplete', self.get_indent_spaces()
384 384 else:
385 385 return 'complete', None
386 386 finally:
387 387 self.reset()
388 388
389 def push(self, lines):
389 def push(self, lines:str) -> bool:
390 390 """Push one or more lines of input.
391 391
392 392 This stores the given lines and returns a status code indicating
393 393 whether the code forms a complete Python block or not.
394 394
395 395 Any exceptions generated in compilation are swallowed, but if an
396 396 exception was produced, the method returns True.
397 397
398 398 Parameters
399 399 ----------
400 400 lines : string
401 401 One or more lines of Python input.
402 402
403 403 Returns
404 404 -------
405 405 is_complete : boolean
406 406 True if the current input source (the result of the current input
407 407 plus prior inputs) forms a complete Python execution block. Note that
408 408 this value is also stored as a private attribute (``_is_complete``), so it
409 409 can be queried at any time.
410 410 """
411 assert isinstance(lines, str)
411 412 self._store(lines)
412 413 source = self.source
413 414
414 415 # Before calling _compile(), reset the code object to None so that if an
415 416 # exception is raised in compilation, we don't mislead by having
416 417 # inconsistent code/source attributes.
417 418 self.code, self._is_complete = None, None
418 419 self._is_invalid = False
419 420
420 421 # Honor termination lines properly
421 422 if source.endswith('\\\n'):
422 423 return False
423 424
424 425 try:
425 426 with warnings.catch_warnings():
426 427 warnings.simplefilter('error', SyntaxWarning)
427 428 self.code = self._compile(source, symbol="exec")
428 429 # Invalid syntax can produce any of a number of different errors from
429 430 # inside the compiler, so we have to catch them all. Syntax errors
430 431 # immediately produce a 'ready' block, so the invalid Python can be
431 432 # sent to the kernel for evaluation with possible ipython
432 433 # special-syntax conversion.
433 434 except (SyntaxError, OverflowError, ValueError, TypeError,
434 435 MemoryError, SyntaxWarning):
435 436 self._is_complete = True
436 437 self._is_invalid = True
437 438 else:
438 439 # Compilation didn't produce any exceptions (though it may not have
439 440 # given a complete code object)
440 441 self._is_complete = self.code is not None
441 442
442 443 return self._is_complete
443 444
444 445 def push_accepts_more(self):
445 446 """Return whether a block of interactive input can accept more input.
446 447
447 448 This method is meant to be used by line-oriented frontends, who need to
448 449 guess whether a block is complete or not based solely on prior and
449 450 current input lines. The InputSplitter considers it has a complete
450 451 interactive block and will not accept more input when either:
451 452
452 453 * A SyntaxError is raised
453 454
454 455 * The code is complete and consists of a single line or a single
455 456 non-compound statement
456 457
457 458 * The code is complete and has a blank line at the end
458 459
459 460 If the current input produces a syntax error, this method immediately
460 461 returns False but does *not* raise the syntax error exception, as
461 462 typically clients will want to send invalid syntax to an execution
462 463 backend which might convert the invalid syntax into valid Python via
463 464 one of the dynamic IPython mechanisms.
464 465 """
465 466
466 467 # With incomplete input, unconditionally accept more
467 468 # A syntax error also sets _is_complete to True - see push()
468 469 if not self._is_complete:
469 470 #print("Not complete") # debug
470 471 return True
471 472
472 473 # The user can make any (complete) input execute by leaving a blank line
473 474 last_line = self.source.splitlines()[-1]
474 475 if (not last_line) or last_line.isspace():
475 476 #print("Blank line") # debug
476 477 return False
477 478
478 479 # If there's just a single line or AST node, and we're flush left, as is
479 480 # the case after a simple statement such as 'a=1', we want to execute it
480 481 # straight away.
481 482 if self.get_indent_spaces() == 0:
482 483 if len(self.source.splitlines()) <= 1:
483 484 return False
484 485
485 486 try:
486 487 code_ast = ast.parse(u''.join(self._buffer))
487 488 except Exception:
488 489 #print("Can't parse AST") # debug
489 490 return False
490 491 else:
491 492 if len(code_ast.body) == 1 and \
492 493 not hasattr(code_ast.body[0], 'body'):
493 494 #print("Simple statement") # debug
494 495 return False
495 496
496 497 # General fallback - accept more code
497 498 return True
498 499
499 500 def get_indent_spaces(self):
500 501 sourcefor, n = self._indent_spaces_cache
501 502 if sourcefor == self.source:
502 503 return n
503 504
504 505 # self.source always has a trailing newline
505 506 n = find_next_indent(self.source[:-1])
506 507 self._indent_spaces_cache = (self.source, n)
507 508 return n
508 509
509 510 # Backwards compatibility. I think all code that used .indent_spaces was
510 511 # inside IPython, but we can leave this here until IPython 7 in case any
511 512 # other modules are using it. -TK, November 2017
512 513 indent_spaces = property(get_indent_spaces)
513 514
514 515 def _store(self, lines, buffer=None, store='source'):
515 516 """Store one or more lines of input.
516 517
517 518 If input lines are not newline-terminated, a newline is automatically
518 519 appended."""
519 520
520 521 if buffer is None:
521 522 buffer = self._buffer
522 523
523 524 if lines.endswith('\n'):
524 525 buffer.append(lines)
525 526 else:
526 527 buffer.append(lines+'\n')
527 528 setattr(self, store, self._set_source(buffer))
528 529
529 530 def _set_source(self, buffer):
530 531 return u''.join(buffer)
531 532
532 533
533 534 class IPythonInputSplitter(InputSplitter):
534 535 """An input splitter that recognizes all of IPython's special syntax."""
535 536
536 537 # String with raw, untransformed input.
537 538 source_raw = ''
538 539
539 540 # Flag to track when a transformer has stored input that it hasn't given
540 541 # back yet.
541 542 transformer_accumulating = False
542 543
543 544 # Flag to track when assemble_python_lines has stored input that it hasn't
544 545 # given back yet.
545 546 within_python_line = False
546 547
547 548 # Private attributes
548 549
549 550 # List with lines of raw input accumulated so far.
550 551 _buffer_raw = None
551 552
552 553 def __init__(self, line_input_checker=True, physical_line_transforms=None,
553 554 logical_line_transforms=None, python_line_transforms=None):
554 555 super(IPythonInputSplitter, self).__init__()
555 556 self._buffer_raw = []
556 557 self._validate = True
557 558
558 559 if physical_line_transforms is not None:
559 560 self.physical_line_transforms = physical_line_transforms
560 561 else:
561 562 self.physical_line_transforms = [
562 563 leading_indent(),
563 564 classic_prompt(),
564 565 ipy_prompt(),
565 566 cellmagic(end_on_blank_line=line_input_checker),
566 567 ]
567 568
568 569 self.assemble_logical_lines = assemble_logical_lines()
569 570 if logical_line_transforms is not None:
570 571 self.logical_line_transforms = logical_line_transforms
571 572 else:
572 573 self.logical_line_transforms = [
573 574 help_end(),
574 575 escaped_commands(),
575 576 assign_from_magic(),
576 577 assign_from_system(),
577 578 ]
578 579
579 580 self.assemble_python_lines = assemble_python_lines()
580 581 if python_line_transforms is not None:
581 582 self.python_line_transforms = python_line_transforms
582 583 else:
583 584 # We don't use any of these at present
584 585 self.python_line_transforms = []
585 586
586 587 @property
587 588 def transforms(self):
588 589 "Quick access to all transformers."
589 590 return self.physical_line_transforms + \
590 591 [self.assemble_logical_lines] + self.logical_line_transforms + \
591 592 [self.assemble_python_lines] + self.python_line_transforms
592 593
593 594 @property
594 595 def transforms_in_use(self):
595 596 """Transformers, excluding logical line transformers if we're in a
596 597 Python line."""
597 598 t = self.physical_line_transforms[:]
598 599 if not self.within_python_line:
599 600 t += [self.assemble_logical_lines] + self.logical_line_transforms
600 601 return t + [self.assemble_python_lines] + self.python_line_transforms
601 602
602 603 def reset(self):
603 604 """Reset the input buffer and associated state."""
604 605 super(IPythonInputSplitter, self).reset()
605 606 self._buffer_raw[:] = []
606 607 self.source_raw = ''
607 608 self.transformer_accumulating = False
608 609 self.within_python_line = False
609 610
610 611 for t in self.transforms:
611 612 try:
612 613 t.reset()
613 614 except SyntaxError:
614 615 # Nothing that calls reset() expects to handle transformer
615 616 # errors
616 617 pass
617 618
618 619 def flush_transformers(self):
619 620 def _flush(transform, outs):
620 621 """yield transformed lines
621 622
622 623 always strings, never None
623 624
624 625 transform: the current transform
625 626 outs: an iterable of previously transformed inputs.
626 627 Each may be multiline, which will be passed
627 628 one line at a time to transform.
628 629 """
629 630 for out in outs:
630 631 for line in out.splitlines():
631 632 # push one line at a time
632 633 tmp = transform.push(line)
633 634 if tmp is not None:
634 635 yield tmp
635 636
636 637 # reset the transform
637 638 tmp = transform.reset()
638 639 if tmp is not None:
639 640 yield tmp
640 641
641 642 out = []
642 643 for t in self.transforms_in_use:
643 644 out = _flush(t, out)
644 645
645 646 out = list(out)
646 647 if out:
647 648 self._store('\n'.join(out))
648 649
649 650 def raw_reset(self):
650 651 """Return raw input only and perform a full reset.
651 652 """
652 653 out = self.source_raw
653 654 self.reset()
654 655 return out
655 656
656 657 def source_reset(self):
657 658 try:
658 659 self.flush_transformers()
659 660 return self.source
660 661 finally:
661 662 self.reset()
662 663
663 664 def push_accepts_more(self):
664 665 if self.transformer_accumulating:
665 666 return True
666 667 else:
667 668 return super(IPythonInputSplitter, self).push_accepts_more()
668 669
669 670 def transform_cell(self, cell):
670 671 """Process and translate a cell of input.
671 672 """
672 673 self.reset()
673 674 try:
674 675 self.push(cell)
675 676 self.flush_transformers()
676 677 return self.source
677 678 finally:
678 679 self.reset()
679 680
680 def push(self, lines):
681 def push(self, lines:str) -> bool:
681 682 """Push one or more lines of IPython input.
682 683
683 684 This stores the given lines and returns a status code indicating
684 685 whether the code forms a complete Python block or not, after processing
685 686 all input lines for special IPython syntax.
686 687
687 688 Any exceptions generated in compilation are swallowed, but if an
688 689 exception was produced, the method returns True.
689 690
690 691 Parameters
691 692 ----------
692 693 lines : string
693 694 One or more lines of Python input.
694 695
695 696 Returns
696 697 -------
697 698 is_complete : boolean
698 699 True if the current input source (the result of the current input
699 700 plus prior inputs) forms a complete Python execution block. Note that
700 701 this value is also stored as a private attribute (_is_complete), so it
701 702 can be queried at any time.
702 703 """
703
704 assert isinstance(lines, str)
704 705 # We must ensure all input is pure unicode
705 lines = cast_unicode(lines, self.encoding)
706 706 # ''.splitlines() --> [], but we need to push the empty line to transformers
707 707 lines_list = lines.splitlines()
708 708 if not lines_list:
709 709 lines_list = ['']
710 710
711 711 # Store raw source before applying any transformations to it. Note
712 712 # that this must be done *after* the reset() call that would otherwise
713 713 # flush the buffer.
714 714 self._store(lines, self._buffer_raw, 'source_raw')
715 715
716 716 transformed_lines_list = []
717 717 for line in lines_list:
718 718 transformed = self._transform_line(line)
719 719 if transformed is not None:
720 720 transformed_lines_list.append(transformed)
721 721
722 722 if transformed_lines_list:
723 723 transformed_lines = '\n'.join(transformed_lines_list)
724 724 return super(IPythonInputSplitter, self).push(transformed_lines)
725 725 else:
726 726 # Got nothing back from transformers - they must be waiting for
727 727 # more input.
728 728 return False
729 729
730 730 def _transform_line(self, line):
731 731 """Push a line of input code through the various transformers.
732 732
733 733 Returns any output from the transformers, or None if a transformer
734 734 is accumulating lines.
735 735
736 736 Sets self.transformer_accumulating as a side effect.
737 737 """
738 738 def _accumulating(dbg):
739 739 #print(dbg)
740 740 self.transformer_accumulating = True
741 741 return None
742 742
743 743 for transformer in self.physical_line_transforms:
744 744 line = transformer.push(line)
745 745 if line is None:
746 746 return _accumulating(transformer)
747 747
748 748 if not self.within_python_line:
749 749 line = self.assemble_logical_lines.push(line)
750 750 if line is None:
751 751 return _accumulating('acc logical line')
752 752
753 753 for transformer in self.logical_line_transforms:
754 754 line = transformer.push(line)
755 755 if line is None:
756 756 return _accumulating(transformer)
757 757
758 758 line = self.assemble_python_lines.push(line)
759 759 if line is None:
760 760 self.within_python_line = True
761 761 return _accumulating('acc python line')
762 762 else:
763 763 self.within_python_line = False
764 764
765 765 for transformer in self.python_line_transforms:
766 766 line = transformer.push(line)
767 767 if line is None:
768 768 return _accumulating(transformer)
769 769
770 770 #print("transformers clear") #debug
771 771 self.transformer_accumulating = False
772 772 return line
773 773
@@ -1,772 +1,772 b''
1 1 # encoding: utf-8
2 2 """
3 3 Utilities for working with strings and text.
4 4
5 5 Inheritance diagram:
6 6
7 7 .. inheritance-diagram:: IPython.utils.text
8 8 :parts: 3
9 9 """
10 10
11 11 import os
12 12 import re
13 13 import sys
14 14 import textwrap
15 15 from string import Formatter
16 16 from pathlib import Path
17 17
18 18 from IPython.utils import py3compat
19 19
20 20 # datetime.strftime date format for ipython
21 21 if sys.platform == 'win32':
22 22 date_format = "%B %d, %Y"
23 23 else:
24 24 date_format = "%B %-d, %Y"
25 25
26 26 class LSString(str):
27 27 """String derivative with a special access attributes.
28 28
29 29 These are normal strings, but with the special attributes:
30 30
31 31 .l (or .list) : value as list (split on newlines).
32 32 .n (or .nlstr): original value (the string itself).
33 33 .s (or .spstr): value as whitespace-separated string.
34 34 .p (or .paths): list of path objects (requires path.py package)
35 35
36 36 Any values which require transformations are computed only once and
37 37 cached.
38 38
39 39 Such strings are very useful to efficiently interact with the shell, which
40 40 typically only understands whitespace-separated options for commands."""
41 41
42 42 def get_list(self):
43 43 try:
44 44 return self.__list
45 45 except AttributeError:
46 46 self.__list = self.split('\n')
47 47 return self.__list
48 48
49 49 l = list = property(get_list)
50 50
51 51 def get_spstr(self):
52 52 try:
53 53 return self.__spstr
54 54 except AttributeError:
55 55 self.__spstr = self.replace('\n',' ')
56 56 return self.__spstr
57 57
58 58 s = spstr = property(get_spstr)
59 59
60 60 def get_nlstr(self):
61 61 return self
62 62
63 63 n = nlstr = property(get_nlstr)
64 64
65 65 def get_paths(self):
66 66 try:
67 67 return self.__paths
68 68 except AttributeError:
69 69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
70 70 return self.__paths
71 71
72 72 p = paths = property(get_paths)
73 73
74 74 # FIXME: We need to reimplement type specific displayhook and then add this
75 75 # back as a custom printer. This should also be moved outside utils into the
76 76 # core.
77 77
78 78 # def print_lsstring(arg):
79 79 # """ Prettier (non-repr-like) and more informative printer for LSString """
80 80 # print "LSString (.p, .n, .l, .s available). Value:"
81 81 # print arg
82 82 #
83 83 #
84 84 # print_lsstring = result_display.register(LSString)(print_lsstring)
85 85
86 86
87 87 class SList(list):
88 88 """List derivative with a special access attributes.
89 89
90 90 These are normal lists, but with the special attributes:
91 91
92 92 * .l (or .list) : value as list (the list itself).
93 93 * .n (or .nlstr): value as a string, joined on newlines.
94 94 * .s (or .spstr): value as a string, joined on spaces.
95 95 * .p (or .paths): list of path objects (requires path.py package)
96 96
97 97 Any values which require transformations are computed only once and
98 98 cached."""
99 99
100 100 def get_list(self):
101 101 return self
102 102
103 103 l = list = property(get_list)
104 104
105 105 def get_spstr(self):
106 106 try:
107 107 return self.__spstr
108 108 except AttributeError:
109 109 self.__spstr = ' '.join(self)
110 110 return self.__spstr
111 111
112 112 s = spstr = property(get_spstr)
113 113
114 114 def get_nlstr(self):
115 115 try:
116 116 return self.__nlstr
117 117 except AttributeError:
118 118 self.__nlstr = '\n'.join(self)
119 119 return self.__nlstr
120 120
121 121 n = nlstr = property(get_nlstr)
122 122
123 123 def get_paths(self):
124 124 try:
125 125 return self.__paths
126 126 except AttributeError:
127 127 self.__paths = [Path(p) for p in self if os.path.exists(p)]
128 128 return self.__paths
129 129
130 130 p = paths = property(get_paths)
131 131
132 132 def grep(self, pattern, prune = False, field = None):
133 133 """ Return all strings matching 'pattern' (a regex or callable)
134 134
135 135 This is case-insensitive. If prune is true, return all items
136 136 NOT matching the pattern.
137 137
138 138 If field is specified, the match must occur in the specified
139 139 whitespace-separated field.
140 140
141 141 Examples::
142 142
143 143 a.grep( lambda x: x.startswith('C') )
144 144 a.grep('Cha.*log', prune=1)
145 145 a.grep('chm', field=-1)
146 146 """
147 147
148 148 def match_target(s):
149 149 if field is None:
150 150 return s
151 151 parts = s.split()
152 152 try:
153 153 tgt = parts[field]
154 154 return tgt
155 155 except IndexError:
156 156 return ""
157 157
158 158 if isinstance(pattern, str):
159 159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
160 160 else:
161 161 pred = pattern
162 162 if not prune:
163 163 return SList([el for el in self if pred(match_target(el))])
164 164 else:
165 165 return SList([el for el in self if not pred(match_target(el))])
166 166
167 167 def fields(self, *fields):
168 168 """ Collect whitespace-separated fields from string list
169 169
170 170 Allows quick awk-like usage of string lists.
171 171
172 172 Example data (in var a, created by 'a = !ls -l')::
173 173
174 174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
175 175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
176 176
177 177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
178 178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
179 179 (note the joining by space).
180 180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
181 181
182 182 IndexErrors are ignored.
183 183
184 184 Without args, fields() just split()'s the strings.
185 185 """
186 186 if len(fields) == 0:
187 187 return [el.split() for el in self]
188 188
189 189 res = SList()
190 190 for el in [f.split() for f in self]:
191 191 lineparts = []
192 192
193 193 for fd in fields:
194 194 try:
195 195 lineparts.append(el[fd])
196 196 except IndexError:
197 197 pass
198 198 if lineparts:
199 199 res.append(" ".join(lineparts))
200 200
201 201 return res
202 202
203 203 def sort(self,field= None, nums = False):
204 204 """ sort by specified fields (see fields())
205 205
206 206 Example::
207 207
208 208 a.sort(1, nums = True)
209 209
210 210 Sorts a by second field, in numerical order (so that 21 > 3)
211 211
212 212 """
213 213
214 214 #decorate, sort, undecorate
215 215 if field is not None:
216 216 dsu = [[SList([line]).fields(field), line] for line in self]
217 217 else:
218 218 dsu = [[line, line] for line in self]
219 219 if nums:
220 220 for i in range(len(dsu)):
221 221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
222 222 try:
223 223 n = int(numstr)
224 224 except ValueError:
225 225 n = 0
226 226 dsu[i][0] = n
227 227
228 228
229 229 dsu.sort()
230 230 return SList([t[1] for t in dsu])
231 231
232 232
233 233 # FIXME: We need to reimplement type specific displayhook and then add this
234 234 # back as a custom printer. This should also be moved outside utils into the
235 235 # core.
236 236
237 237 # def print_slist(arg):
238 238 # """ Prettier (non-repr-like) and more informative printer for SList """
239 239 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
240 240 # if hasattr(arg, 'hideonce') and arg.hideonce:
241 241 # arg.hideonce = False
242 242 # return
243 243 #
244 244 # nlprint(arg) # This was a nested list printer, now removed.
245 245 #
246 246 # print_slist = result_display.register(SList)(print_slist)
247 247
248 248
249 249 def indent(instr,nspaces=4, ntabs=0, flatten=False):
250 250 """Indent a string a given number of spaces or tabstops.
251 251
252 252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
253 253
254 254 Parameters
255 255 ----------
256 256
257 257 instr : basestring
258 258 The string to be indented.
259 259 nspaces : int (default: 4)
260 260 The number of spaces to be indented.
261 261 ntabs : int (default: 0)
262 262 The number of tabs to be indented.
263 263 flatten : bool (default: False)
264 264 Whether to scrub existing indentation. If True, all lines will be
265 265 aligned to the same indentation. If False, existing indentation will
266 266 be strictly increased.
267 267
268 268 Returns
269 269 -------
270 270
271 271 str|unicode : string indented by ntabs and nspaces.
272 272
273 273 """
274 274 if instr is None:
275 275 return
276 276 ind = '\t'*ntabs+' '*nspaces
277 277 if flatten:
278 278 pat = re.compile(r'^\s*', re.MULTILINE)
279 279 else:
280 280 pat = re.compile(r'^', re.MULTILINE)
281 281 outstr = re.sub(pat, ind, instr)
282 282 if outstr.endswith(os.linesep+ind):
283 283 return outstr[:-len(ind)]
284 284 else:
285 285 return outstr
286 286
287 287
288 288 def list_strings(arg):
289 289 """Always return a list of strings, given a string or list of strings
290 290 as input.
291 291
292 292 Examples
293 293 --------
294 294 ::
295 295
296 296 In [7]: list_strings('A single string')
297 297 Out[7]: ['A single string']
298 298
299 299 In [8]: list_strings(['A single string in a list'])
300 300 Out[8]: ['A single string in a list']
301 301
302 302 In [9]: list_strings(['A','list','of','strings'])
303 303 Out[9]: ['A', 'list', 'of', 'strings']
304 304 """
305 305
306 306 if isinstance(arg, str):
307 307 return [arg]
308 308 else:
309 309 return arg
310 310
311 311
312 312 def marquee(txt='',width=78,mark='*'):
313 313 """Return the input string centered in a 'marquee'.
314 314
315 315 Examples
316 316 --------
317 317 ::
318 318
319 319 In [16]: marquee('A test',40)
320 320 Out[16]: '**************** A test ****************'
321 321
322 322 In [17]: marquee('A test',40,'-')
323 323 Out[17]: '---------------- A test ----------------'
324 324
325 325 In [18]: marquee('A test',40,' ')
326 326 Out[18]: ' A test '
327 327
328 328 """
329 329 if not txt:
330 330 return (mark*width)[:width]
331 331 nmark = (width-len(txt)-2)//len(mark)//2
332 332 if nmark < 0: nmark =0
333 333 marks = mark*nmark
334 334 return '%s %s %s' % (marks,txt,marks)
335 335
336 336
337 337 ini_spaces_re = re.compile(r'^(\s+)')
338 338
339 339 def num_ini_spaces(strng):
340 340 """Return the number of initial spaces in a string"""
341 341
342 342 ini_spaces = ini_spaces_re.match(strng)
343 343 if ini_spaces:
344 344 return ini_spaces.end()
345 345 else:
346 346 return 0
347 347
348 348
349 349 def format_screen(strng):
350 350 """Format a string for screen printing.
351 351
352 352 This removes some latex-type format codes."""
353 353 # Paragraph continue
354 354 par_re = re.compile(r'\\$',re.MULTILINE)
355 355 strng = par_re.sub('',strng)
356 356 return strng
357 357
358 358
359 359 def dedent(text):
360 360 """Equivalent of textwrap.dedent that ignores unindented first line.
361 361
362 362 This means it will still dedent strings like:
363 363 '''foo
364 364 is a bar
365 365 '''
366 366
367 367 For use in wrap_paragraphs.
368 368 """
369 369
370 370 if text.startswith('\n'):
371 371 # text starts with blank line, don't ignore the first line
372 372 return textwrap.dedent(text)
373 373
374 374 # split first line
375 375 splits = text.split('\n',1)
376 376 if len(splits) == 1:
377 377 # only one line
378 378 return textwrap.dedent(text)
379 379
380 380 first, rest = splits
381 381 # dedent everything but the first line
382 382 rest = textwrap.dedent(rest)
383 383 return '\n'.join([first, rest])
384 384
385 385
386 386 def wrap_paragraphs(text, ncols=80):
387 387 """Wrap multiple paragraphs to fit a specified width.
388 388
389 389 This is equivalent to textwrap.wrap, but with support for multiple
390 390 paragraphs, as separated by empty lines.
391 391
392 392 Returns
393 393 -------
394 394
395 395 list of complete paragraphs, wrapped to fill `ncols` columns.
396 396 """
397 397 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
398 398 text = dedent(text).strip()
399 399 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
400 400 out_ps = []
401 401 indent_re = re.compile(r'\n\s+', re.MULTILINE)
402 402 for p in paragraphs:
403 403 # presume indentation that survives dedent is meaningful formatting,
404 404 # so don't fill unless text is flush.
405 405 if indent_re.search(p) is None:
406 406 # wrap paragraph
407 407 p = textwrap.fill(p, ncols)
408 408 out_ps.append(p)
409 409 return out_ps
410 410
411 411
412 412 def long_substr(data):
413 413 """Return the longest common substring in a list of strings.
414 414
415 415 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
416 416 """
417 417 substr = ''
418 418 if len(data) > 1 and len(data[0]) > 0:
419 419 for i in range(len(data[0])):
420 420 for j in range(len(data[0])-i+1):
421 421 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
422 422 substr = data[0][i:i+j]
423 423 elif len(data) == 1:
424 424 substr = data[0]
425 425 return substr
426 426
427 427
428 428 def strip_email_quotes(text):
429 429 """Strip leading email quotation characters ('>').
430 430
431 431 Removes any combination of leading '>' interspersed with whitespace that
432 432 appears *identically* in all lines of the input text.
433 433
434 434 Parameters
435 435 ----------
436 436 text : str
437 437
438 438 Examples
439 439 --------
440 440
441 441 Simple uses::
442 442
443 443 In [2]: strip_email_quotes('> > text')
444 444 Out[2]: 'text'
445 445
446 446 In [3]: strip_email_quotes('> > text\\n> > more')
447 447 Out[3]: 'text\\nmore'
448 448
449 449 Note how only the common prefix that appears in all lines is stripped::
450 450
451 451 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
452 452 Out[4]: '> text\\n> more\\nmore...'
453 453
454 454 So if any line has no quote marks ('>') , then none are stripped from any
455 455 of them ::
456 456
457 457 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
458 458 Out[5]: '> > text\\n> > more\\nlast different'
459 459 """
460 460 lines = text.splitlines()
461 461 matches = set()
462 462 for line in lines:
463 463 prefix = re.match(r'^(\s*>[ >]*)', line)
464 464 if prefix:
465 465 matches.add(prefix.group(1))
466 466 else:
467 467 break
468 468 else:
469 469 prefix = long_substr(list(matches))
470 470 if prefix:
471 471 strip = len(prefix)
472 472 text = '\n'.join([ ln[strip:] for ln in lines])
473 473 return text
474 474
475 475 def strip_ansi(source):
476 476 """
477 477 Remove ansi escape codes from text.
478 478
479 479 Parameters
480 480 ----------
481 481 source : str
482 482 Source to remove the ansi from
483 483 """
484 484 return re.sub(r'\033\[(\d|;)+?m', '', source)
485 485
486 486
487 487 class EvalFormatter(Formatter):
488 488 """A String Formatter that allows evaluation of simple expressions.
489 489
490 490 Note that this version interprets a : as specifying a format string (as per
491 491 standard string formatting), so if slicing is required, you must explicitly
492 492 create a slice.
493 493
494 494 This is to be used in templating cases, such as the parallel batch
495 495 script templates, where simple arithmetic on arguments is useful.
496 496
497 497 Examples
498 498 --------
499 499 ::
500 500
501 501 In [1]: f = EvalFormatter()
502 502 In [2]: f.format('{n//4}', n=8)
503 503 Out[2]: '2'
504 504
505 505 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
506 506 Out[3]: 'll'
507 507 """
508 508 def get_field(self, name, args, kwargs):
509 509 v = eval(name, kwargs)
510 510 return v, name
511 511
512 512 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
513 513 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
514 514 # above, it should be possible to remove FullEvalFormatter.
515 515
516 516 class FullEvalFormatter(Formatter):
517 517 """A String Formatter that allows evaluation of simple expressions.
518 518
519 519 Any time a format key is not found in the kwargs,
520 520 it will be tried as an expression in the kwargs namespace.
521 521
522 522 Note that this version allows slicing using [1:2], so you cannot specify
523 523 a format string. Use :class:`EvalFormatter` to permit format strings.
524 524
525 525 Examples
526 526 --------
527 527 ::
528 528
529 529 In [1]: f = FullEvalFormatter()
530 530 In [2]: f.format('{n//4}', n=8)
531 531 Out[2]: '2'
532 532
533 533 In [3]: f.format('{list(range(5))[2:4]}')
534 534 Out[3]: '[2, 3]'
535 535
536 536 In [4]: f.format('{3*2}')
537 537 Out[4]: '6'
538 538 """
539 539 # copied from Formatter._vformat with minor changes to allow eval
540 540 # and replace the format_spec code with slicing
541 def vformat(self, format_string, args, kwargs):
541 def vformat(self, format_string:str, args, kwargs)->str:
542 542 result = []
543 543 for literal_text, field_name, format_spec, conversion in \
544 544 self.parse(format_string):
545 545
546 546 # output the literal text
547 547 if literal_text:
548 548 result.append(literal_text)
549 549
550 550 # if there's a field, output it
551 551 if field_name is not None:
552 552 # this is some markup, find the object and do
553 553 # the formatting
554 554
555 555 if format_spec:
556 556 # override format spec, to allow slicing:
557 557 field_name = ':'.join([field_name, format_spec])
558 558
559 559 # eval the contents of the field for the object
560 560 # to be formatted
561 561 obj = eval(field_name, kwargs)
562 562
563 563 # do any conversion on the resulting object
564 564 obj = self.convert_field(obj, conversion)
565 565
566 566 # format the object and append to the result
567 567 result.append(self.format_field(obj, ''))
568 568
569 return ''.join(py3compat.cast_unicode(s) for s in result)
569 return ''.join(result)
570 570
571 571
572 572 class DollarFormatter(FullEvalFormatter):
573 573 """Formatter allowing Itpl style $foo replacement, for names and attribute
574 574 access only. Standard {foo} replacement also works, and allows full
575 575 evaluation of its arguments.
576 576
577 577 Examples
578 578 --------
579 579 ::
580 580
581 581 In [1]: f = DollarFormatter()
582 582 In [2]: f.format('{n//4}', n=8)
583 583 Out[2]: '2'
584 584
585 585 In [3]: f.format('23 * 76 is $result', result=23*76)
586 586 Out[3]: '23 * 76 is 1748'
587 587
588 588 In [4]: f.format('$a or {b}', a=1, b=2)
589 589 Out[4]: '1 or 2'
590 590 """
591 591 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
592 592 def parse(self, fmt_string):
593 593 for literal_txt, field_name, format_spec, conversion \
594 594 in Formatter.parse(self, fmt_string):
595 595
596 596 # Find $foo patterns in the literal text.
597 597 continue_from = 0
598 598 txt = ""
599 599 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
600 600 new_txt, new_field = m.group(1,2)
601 601 # $$foo --> $foo
602 602 if new_field.startswith("$"):
603 603 txt += new_txt + new_field
604 604 else:
605 605 yield (txt + new_txt, new_field, "", None)
606 606 txt = ""
607 607 continue_from = m.end()
608 608
609 609 # Re-yield the {foo} style pattern
610 610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
611 611
612 612 #-----------------------------------------------------------------------------
613 613 # Utils to columnize a list of string
614 614 #-----------------------------------------------------------------------------
615 615
616 616 def _col_chunks(l, max_rows, row_first=False):
617 617 """Yield successive max_rows-sized column chunks from l."""
618 618 if row_first:
619 619 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
620 620 for i in range(ncols):
621 621 yield [l[j] for j in range(i, len(l), ncols)]
622 622 else:
623 623 for i in range(0, len(l), max_rows):
624 624 yield l[i:(i + max_rows)]
625 625
626 626
627 627 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
628 628 """Calculate optimal info to columnize a list of string"""
629 629 for max_rows in range(1, len(rlist) + 1):
630 630 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
631 631 sumlength = sum(col_widths)
632 632 ncols = len(col_widths)
633 633 if sumlength + separator_size * (ncols - 1) <= displaywidth:
634 634 break
635 635 return {'num_columns': ncols,
636 636 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
637 637 'max_rows': max_rows,
638 638 'column_widths': col_widths
639 639 }
640 640
641 641
642 642 def _get_or_default(mylist, i, default=None):
643 643 """return list item number, or default if don't exist"""
644 644 if i >= len(mylist):
645 645 return default
646 646 else :
647 647 return mylist[i]
648 648
649 649
650 650 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
651 651 """Returns a nested list, and info to columnize items
652 652
653 653 Parameters
654 654 ----------
655 655
656 656 items
657 657 list of strings to columize
658 658 row_first : (default False)
659 659 Whether to compute columns for a row-first matrix instead of
660 660 column-first (default).
661 661 empty : (default None)
662 662 default value to fill list if needed
663 663 separator_size : int (default=2)
664 664 How much characters will be used as a separation between each columns.
665 665 displaywidth : int (default=80)
666 666 The width of the area onto which the columns should enter
667 667
668 668 Returns
669 669 -------
670 670
671 671 strings_matrix
672 672
673 673 nested list of string, the outer most list contains as many list as
674 674 rows, the innermost lists have each as many element as columns. If the
675 675 total number of elements in `items` does not equal the product of
676 676 rows*columns, the last element of some lists are filled with `None`.
677 677
678 678 dict_info
679 679 some info to make columnize easier:
680 680
681 681 num_columns
682 682 number of columns
683 683 max_rows
684 684 maximum number of rows (final number may be less)
685 685 column_widths
686 686 list of with of each columns
687 687 optimal_separator_width
688 688 best separator width between columns
689 689
690 690 Examples
691 691 --------
692 692 ::
693 693
694 694 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
695 695 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
696 696 In [3]: list
697 697 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
698 698 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
699 699 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
700 700 Out[5]: True
701 701 """
702 702 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
703 703 nrow, ncol = info['max_rows'], info['num_columns']
704 704 if row_first:
705 705 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
706 706 else:
707 707 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
708 708
709 709
710 710 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
711 711 """ Transform a list of strings into a single string with columns.
712 712
713 713 Parameters
714 714 ----------
715 715 items : sequence of strings
716 716 The strings to process.
717 717
718 718 row_first : (default False)
719 719 Whether to compute columns for a row-first matrix instead of
720 720 column-first (default).
721 721
722 722 separator : str, optional [default is two spaces]
723 723 The string that separates columns.
724 724
725 725 displaywidth : int, optional [default is 80]
726 726 Width of the display in number of characters.
727 727
728 728 Returns
729 729 -------
730 730 The formatted string.
731 731 """
732 732 if not items:
733 733 return '\n'
734 734 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
735 735 if spread:
736 736 separator = separator.ljust(int(info['optimal_separator_width']))
737 737 fmatrix = [filter(None, x) for x in matrix]
738 738 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
739 739 return '\n'.join(map(sjoin, fmatrix))+'\n'
740 740
741 741
742 742 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
743 743 """
744 744 Return a string with a natural enumeration of items
745 745
746 746 >>> get_text_list(['a', 'b', 'c', 'd'])
747 747 'a, b, c and d'
748 748 >>> get_text_list(['a', 'b', 'c'], ' or ')
749 749 'a, b or c'
750 750 >>> get_text_list(['a', 'b', 'c'], ', ')
751 751 'a, b, c'
752 752 >>> get_text_list(['a', 'b'], ' or ')
753 753 'a or b'
754 754 >>> get_text_list(['a'])
755 755 'a'
756 756 >>> get_text_list([])
757 757 ''
758 758 >>> get_text_list(['a', 'b'], wrap_item_with="`")
759 759 '`a` and `b`'
760 760 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
761 761 'a + b + c = d'
762 762 """
763 763 if len(list_) == 0:
764 764 return ''
765 765 if wrap_item_with:
766 766 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
767 767 item in list_]
768 768 if len(list_) == 1:
769 769 return list_[0]
770 770 return '%s%s%s' % (
771 771 sep.join(i for i in list_[:-1]),
772 772 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now