##// END OF EJS Templates
Warn that blck transformer will break complete-input detection....
Matthias Bussonnier -
Show More
@@ -1,741 +1,743 b''
1 1 """Input handling and transformation machinery.
2 2
3 3 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 4 input from a line-oriented frontend is complete and should be executed, and when
5 5 the user should be prompted for another line of code instead. The name 'input
6 6 splitter' is largely for historical reasons.
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 12 and stores the results.
13 13
14 14 For more details, see the class docstrings below.
15 15 """
16 16
17 17 # Copyright (c) IPython Development Team.
18 18 # Distributed under the terms of the Modified BSD License.
19 19 import ast
20 20 import codeop
21 21 import io
22 22 import re
23 23 import sys
24 24 import tokenize
25 25 import warnings
26 26
27 from IPython.utils.py3compat import cast_unicode
27 28 from IPython.core.inputtransformer import (leading_indent,
28 29 classic_prompt,
29 30 ipy_prompt,
30 31 cellmagic,
31 32 assemble_logical_lines,
32 33 help_end,
33 34 escaped_commands,
34 35 assign_from_magic,
35 36 assign_from_system,
36 37 assemble_python_lines,
37 38 )
38 39
39 40 # These are available in this module for backwards compatibility.
40 41 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
41 42 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
42 43 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
43 44
44 45 #-----------------------------------------------------------------------------
45 46 # Utilities
46 47 #-----------------------------------------------------------------------------
47 48
48 49 # FIXME: These are general-purpose utilities that later can be moved to the
49 50 # general ward. Kept here for now because we're being very strict about test
50 51 # coverage with this code, and this lets us ensure that we keep 100% coverage
51 52 # while developing.
52 53
53 54 # compiled regexps for autoindent management
54 55 dedent_re = re.compile('|'.join([
55 56 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
56 57 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
57 58 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
58 59 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
59 60 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
60 61 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
61 62 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
62 63 ]))
63 64 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
64 65
65 66 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
66 67 # before pure comments
67 68 comment_line_re = re.compile('^\s*\#')
68 69
69 70
70 71 def num_ini_spaces(s):
71 72 """Return the number of initial spaces in a string.
72 73
73 74 Note that tabs are counted as a single space. For now, we do *not* support
74 75 mixing of tabs and spaces in the user's input.
75 76
76 77 Parameters
77 78 ----------
78 79 s : string
79 80
80 81 Returns
81 82 -------
82 83 n : int
83 84 """
84 85
85 86 ini_spaces = ini_spaces_re.match(s)
86 87 if ini_spaces:
87 88 return ini_spaces.end()
88 89 else:
89 90 return 0
90 91
91 92 # Fake token types for partial_tokenize:
92 93 INCOMPLETE_STRING = tokenize.N_TOKENS
93 94 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
94 95
95 96 # The 2 classes below have the same API as TokenInfo, but don't try to look up
96 97 # a token type name that they won't find.
97 98 class IncompleteString:
98 99 type = exact_type = INCOMPLETE_STRING
99 100 def __init__(self, s, start, end, line):
100 101 self.s = s
101 102 self.start = start
102 103 self.end = end
103 104 self.line = line
104 105
105 106 class InMultilineStatement:
106 107 type = exact_type = IN_MULTILINE_STATEMENT
107 108 def __init__(self, pos, line):
108 109 self.s = ''
109 110 self.start = self.end = pos
110 111 self.line = line
111 112
112 113 def partial_tokens(s):
113 114 """Iterate over tokens from a possibly-incomplete string of code.
114 115
115 116 This adds two special token types: INCOMPLETE_STRING and
116 117 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
117 118 represent the two main ways for code to be incomplete.
118 119 """
119 120 readline = io.StringIO(s).readline
120 121 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
121 122 try:
122 123 for token in tokenize.generate_tokens(readline):
123 124 yield token
124 125 except tokenize.TokenError as e:
125 126 # catch EOF error
126 127 lines = s.splitlines(keepends=True)
127 128 end = len(lines), len(lines[-1])
128 129 if 'multi-line string' in e.args[0]:
129 130 l, c = start = token.end
130 131 s = lines[l-1][c:] + ''.join(lines[l:])
131 132 yield IncompleteString(s, start, end, lines[-1])
132 133 elif 'multi-line statement' in e.args[0]:
133 134 yield InMultilineStatement(end, lines[-1])
134 135 else:
135 136 raise
136 137
137 138 def find_next_indent(code):
138 139 """Find the number of spaces for the next line of indentation"""
139 140 tokens = list(partial_tokens(code))
140 141 if tokens[-1].type == tokenize.ENDMARKER:
141 142 tokens.pop()
142 143 if not tokens:
143 144 return 0
144 145 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
145 146 tokens.pop()
146 147
147 148 if tokens[-1].type == INCOMPLETE_STRING:
148 149 # Inside a multiline string
149 150 return 0
150 151
151 152 # Find the indents used before
152 153 prev_indents = [0]
153 154 def _add_indent(n):
154 155 if n != prev_indents[-1]:
155 156 prev_indents.append(n)
156 157
157 158 tokiter = iter(tokens)
158 159 for tok in tokiter:
159 160 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
160 161 _add_indent(tok.end[1])
161 162 elif (tok.type == tokenize.NL):
162 163 try:
163 164 _add_indent(next(tokiter).start[1])
164 165 except StopIteration:
165 166 break
166 167
167 168 last_indent = prev_indents.pop()
168 169
169 170 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
170 171 if tokens[-1].type == IN_MULTILINE_STATEMENT:
171 172 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
172 173 return last_indent + 4
173 174 return last_indent
174 175
175 176 if tokens[-1].exact_type == tokenize.COLON:
176 177 # Line ends with colon - indent
177 178 return last_indent + 4
178 179
179 180 if last_indent:
180 181 # Examine the last line for dedent cues - statements like return or
181 182 # raise which normally end a block of code.
182 183 last_line_starts = 0
183 184 for i, tok in enumerate(tokens):
184 185 if tok.type == tokenize.NEWLINE:
185 186 last_line_starts = i + 1
186 187
187 188 last_line_tokens = tokens[last_line_starts:]
188 189 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
189 190 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
190 191 # Find the most recent indentation less than the current level
191 192 for indent in reversed(prev_indents):
192 193 if indent < last_indent:
193 194 return indent
194 195
195 196 return last_indent
196 197
197 198
198 199 def last_blank(src):
199 200 """Determine if the input source ends in a blank.
200 201
201 202 A blank is either a newline or a line consisting of whitespace.
202 203
203 204 Parameters
204 205 ----------
205 206 src : string
206 207 A single or multiline string.
207 208 """
208 209 if not src: return False
209 210 ll = src.splitlines()[-1]
210 211 return (ll == '') or ll.isspace()
211 212
212 213
213 214 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
214 215 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
215 216
216 217 def last_two_blanks(src):
217 218 """Determine if the input source ends in two blanks.
218 219
219 220 A blank is either a newline or a line consisting of whitespace.
220 221
221 222 Parameters
222 223 ----------
223 224 src : string
224 225 A single or multiline string.
225 226 """
226 227 if not src: return False
227 228 # The logic here is tricky: I couldn't get a regexp to work and pass all
228 229 # the tests, so I took a different approach: split the source by lines,
229 230 # grab the last two and prepend '###\n' as a stand-in for whatever was in
230 231 # the body before the last two lines. Then, with that structure, it's
231 232 # possible to analyze with two regexps. Not the most elegant solution, but
232 233 # it works. If anyone tries to change this logic, make sure to validate
233 234 # the whole test suite first!
234 235 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
235 236 return (bool(last_two_blanks_re.match(new_src)) or
236 237 bool(last_two_blanks_re2.match(new_src)) )
237 238
238 239
239 240 def remove_comments(src):
240 241 """Remove all comments from input source.
241 242
242 243 Note: comments are NOT recognized inside of strings!
243 244
244 245 Parameters
245 246 ----------
246 247 src : string
247 248 A single or multiline input string.
248 249
249 250 Returns
250 251 -------
251 252 String with all Python comments removed.
252 253 """
253 254
254 255 return re.sub('#.*', '', src)
255 256
256 257
257 258 def get_input_encoding():
258 259 """Return the default standard input encoding.
259 260
260 261 If sys.stdin has no encoding, 'ascii' is returned."""
261 262 # There are strange environments for which sys.stdin.encoding is None. We
262 263 # ensure that a valid encoding is returned.
263 264 encoding = getattr(sys.stdin, 'encoding', None)
264 265 if encoding is None:
265 266 encoding = 'ascii'
266 267 return encoding
267 268
268 269 #-----------------------------------------------------------------------------
269 270 # Classes and functions for normal Python syntax handling
270 271 #-----------------------------------------------------------------------------
271 272
272 273 class InputSplitter(object):
273 274 r"""An object that can accumulate lines of Python source before execution.
274 275
275 276 This object is designed to be fed python source line-by-line, using
276 277 :meth:`push`. It will return on each push whether the currently pushed
277 278 code could be executed already. In addition, it provides a method called
278 279 :meth:`push_accepts_more` that can be used to query whether more input
279 280 can be pushed into a single interactive block.
280 281
281 282 This is a simple example of how an interactive terminal-based client can use
282 283 this tool::
283 284
284 285 isp = InputSplitter()
285 286 while isp.push_accepts_more():
286 287 indent = ' '*isp.indent_spaces
287 288 prompt = '>>> ' + indent
288 289 line = indent + raw_input(prompt)
289 290 isp.push(line)
290 291 print 'Input source was:\n', isp.source_reset(),
291 292 """
292 293 # Number of spaces of indentation computed from input that has been pushed
293 294 # so far. This is the attributes callers should query to get the current
294 295 # indentation level, in order to provide auto-indent facilities.
295 296 indent_spaces = 0
296 297 # String, indicating the default input encoding. It is computed by default
297 298 # at initialization time via get_input_encoding(), but it can be reset by a
298 299 # client with specific knowledge of the encoding.
299 300 encoding = ''
300 301 # String where the current full source input is stored, properly encoded.
301 302 # Reading this attribute is the normal way of querying the currently pushed
302 303 # source code, that has been properly encoded.
303 304 source = ''
304 305 # Code object corresponding to the current source. It is automatically
305 306 # synced to the source, so it can be queried at any time to obtain the code
306 307 # object; it will be None if the source doesn't compile to valid Python.
307 308 code = None
308 309
309 310 # Private attributes
310 311
311 312 # List with lines of input accumulated so far
312 313 _buffer = None
313 314 # Command compiler
314 315 _compile = None
315 316 # Mark when input has changed indentation all the way back to flush-left
316 317 _full_dedent = False
317 318 # Boolean indicating whether the current block is complete
318 319 _is_complete = None
319 320 # Boolean indicating whether the current block has an unrecoverable syntax error
320 321 _is_invalid = False
321 322
322 323 def __init__(self):
323 324 """Create a new InputSplitter instance.
324 325 """
325 326 self._buffer = []
326 327 self._compile = codeop.CommandCompiler()
327 328 self.encoding = get_input_encoding()
328 329
329 330 def reset(self):
330 331 """Reset the input buffer and associated state."""
331 332 self.indent_spaces = 0
332 333 self._buffer[:] = []
333 334 self.source = ''
334 335 self.code = None
335 336 self._is_complete = False
336 337 self._is_invalid = False
337 338 self._full_dedent = False
338 339
339 340 def source_reset(self):
340 341 """Return the input source and perform a full reset.
341 342 """
342 343 out = self.source
343 344 self.reset()
344 345 return out
345 346
346 347 def check_complete(self, source):
347 348 """Return whether a block of code is ready to execute, or should be continued
348 349
349 350 This is a non-stateful API, and will reset the state of this InputSplitter.
350 351
351 352 Parameters
352 353 ----------
353 354 source : string
354 355 Python input code, which can be multiline.
355 356
356 357 Returns
357 358 -------
358 359 status : str
359 360 One of 'complete', 'incomplete', or 'invalid' if source is not a
360 361 prefix of valid code.
361 362 indent_spaces : int or None
362 363 The number of spaces by which to indent the next line of code. If
363 364 status is not 'incomplete', this is None.
364 365 """
365 366 self.reset()
366 367 try:
367 368 self.push(source)
368 369 except SyntaxError:
369 370 # Transformers in IPythonInputSplitter can raise SyntaxError,
370 371 # which push() will not catch.
371 372 return 'invalid', None
372 373 else:
373 374 if self._is_invalid:
374 375 return 'invalid', None
375 376 elif self.push_accepts_more():
376 377 return 'incomplete', self.indent_spaces
377 378 else:
378 379 return 'complete', None
379 380 finally:
380 381 self.reset()
381 382
382 383 def push(self, lines):
383 384 """Push one or more lines of input.
384 385
385 386 This stores the given lines and returns a status code indicating
386 387 whether the code forms a complete Python block or not.
387 388
388 389 Any exceptions generated in compilation are swallowed, but if an
389 390 exception was produced, the method returns True.
390 391
391 392 Parameters
392 393 ----------
393 394 lines : string
394 395 One or more lines of Python input.
395 396
396 397 Returns
397 398 -------
398 399 is_complete : boolean
399 400 True if the current input source (the result of the current input
400 401 plus prior inputs) forms a complete Python execution block. Note that
401 402 this value is also stored as a private attribute (``_is_complete``), so it
402 403 can be queried at any time.
403 404 """
404 405 self._store(lines)
405 406 source = self.source
406 407
407 408 # Before calling _compile(), reset the code object to None so that if an
408 409 # exception is raised in compilation, we don't mislead by having
409 410 # inconsistent code/source attributes.
410 411 self.code, self._is_complete = None, None
411 412 self._is_invalid = False
412 413
413 414 # Honor termination lines properly
414 415 if source.endswith('\\\n'):
415 416 return False
416 417
417 418 self._update_indent()
418 419 try:
419 420 with warnings.catch_warnings():
420 421 warnings.simplefilter('error', SyntaxWarning)
421 422 self.code = self._compile(source, symbol="exec")
422 423 # Invalid syntax can produce any of a number of different errors from
423 424 # inside the compiler, so we have to catch them all. Syntax errors
424 425 # immediately produce a 'ready' block, so the invalid Python can be
425 426 # sent to the kernel for evaluation with possible ipython
426 427 # special-syntax conversion.
427 428 except (SyntaxError, OverflowError, ValueError, TypeError,
428 429 MemoryError, SyntaxWarning):
429 430 self._is_complete = True
430 431 self._is_invalid = True
431 432 else:
432 433 # Compilation didn't produce any exceptions (though it may not have
433 434 # given a complete code object)
434 435 self._is_complete = self.code is not None
435 436
436 437 return self._is_complete
437 438
438 439 def push_accepts_more(self):
439 440 """Return whether a block of interactive input can accept more input.
440 441
441 442 This method is meant to be used by line-oriented frontends, who need to
442 443 guess whether a block is complete or not based solely on prior and
443 444 current input lines. The InputSplitter considers it has a complete
444 445 interactive block and will not accept more input when either:
445 446
446 447 * A SyntaxError is raised
447 448
448 449 * The code is complete and consists of a single line or a single
449 450 non-compound statement
450 451
451 452 * The code is complete and has a blank line at the end
452 453
453 454 If the current input produces a syntax error, this method immediately
454 455 returns False but does *not* raise the syntax error exception, as
455 456 typically clients will want to send invalid syntax to an execution
456 457 backend which might convert the invalid syntax into valid Python via
457 458 one of the dynamic IPython mechanisms.
458 459 """
459 460
460 461 # With incomplete input, unconditionally accept more
461 462 # A syntax error also sets _is_complete to True - see push()
462 463 if not self._is_complete:
463 464 #print("Not complete") # debug
464 465 return True
465 466
466 467 # The user can make any (complete) input execute by leaving a blank line
467 468 last_line = self.source.splitlines()[-1]
468 469 if (not last_line) or last_line.isspace():
469 470 #print("Blank line") # debug
470 471 return False
471 472
472 473 # If there's just a single line or AST node, and we're flush left, as is
473 474 # the case after a simple statement such as 'a=1', we want to execute it
474 475 # straight away.
475 476 if self.indent_spaces==0:
476 477 if len(self.source.splitlines()) <= 1:
477 478 return False
478 479
479 480 try:
480 481 code_ast = ast.parse(u''.join(self._buffer))
481 482 except Exception:
482 483 #print("Can't parse AST") # debug
483 484 return False
484 485 else:
485 486 if len(code_ast.body) == 1 and \
486 487 not hasattr(code_ast.body[0], 'body'):
487 488 #print("Simple statement") # debug
488 489 return False
489 490
490 491 # General fallback - accept more code
491 492 return True
492 493
493 494 def _update_indent(self):
494 495 # self.source always has a trailing newline
495 496 self.indent_spaces = find_next_indent(self.source[:-1])
496 497 self._full_dedent = (self.indent_spaces == 0)
497 498
498 499 def _store(self, lines, buffer=None, store='source'):
499 500 """Store one or more lines of input.
500 501
501 502 If input lines are not newline-terminated, a newline is automatically
502 503 appended."""
503 504
504 505 if buffer is None:
505 506 buffer = self._buffer
506 507
507 508 if lines.endswith('\n'):
508 509 buffer.append(lines)
509 510 else:
510 511 buffer.append(lines+'\n')
511 512 setattr(self, store, self._set_source(buffer))
512 513
513 514 def _set_source(self, buffer):
514 515 return u''.join(buffer)
515 516
516 517
517 518 class IPythonInputSplitter(InputSplitter):
518 519 """An input splitter that recognizes all of IPython's special syntax."""
519 520
520 521 # String with raw, untransformed input.
521 522 source_raw = ''
522 523
523 524 # Flag to track when a transformer has stored input that it hasn't given
524 525 # back yet.
525 526 transformer_accumulating = False
526 527
527 528 # Flag to track when assemble_python_lines has stored input that it hasn't
528 529 # given back yet.
529 530 within_python_line = False
530 531
531 532 # Private attributes
532 533
533 534 # List with lines of raw input accumulated so far.
534 535 _buffer_raw = None
535 536
536 537 def __init__(self, line_input_checker=True, physical_line_transforms=None,
537 538 logical_line_transforms=None, python_line_transforms=None):
538 539 super(IPythonInputSplitter, self).__init__()
539 540 self._buffer_raw = []
540 541 self._validate = True
541 542
542 543 if physical_line_transforms is not None:
543 544 self.physical_line_transforms = physical_line_transforms
544 545 else:
545 546 self.physical_line_transforms = [
546 547 leading_indent(),
547 548 classic_prompt(),
548 549 ipy_prompt(),
549 550 cellmagic(end_on_blank_line=line_input_checker),
550 551 ]
551 552
552 553 self.assemble_logical_lines = assemble_logical_lines()
553 554 if logical_line_transforms is not None:
554 555 self.logical_line_transforms = logical_line_transforms
555 556 else:
556 557 self.logical_line_transforms = [
557 558 help_end(),
558 559 escaped_commands(),
559 560 assign_from_magic(),
560 561 assign_from_system(),
561 562 ]
562 563
563 564 self.assemble_python_lines = assemble_python_lines()
564 565 if python_line_transforms is not None:
565 566 self.python_line_transforms = python_line_transforms
566 567 else:
567 568 # We don't use any of these at present
568 569 self.python_line_transforms = []
569 570
570 571 @property
571 572 def transforms(self):
572 573 "Quick access to all transformers."
573 574 return self.physical_line_transforms + \
574 575 [self.assemble_logical_lines] + self.logical_line_transforms + \
575 576 [self.assemble_python_lines] + self.python_line_transforms
576 577
577 578 @property
578 579 def transforms_in_use(self):
579 580 """Transformers, excluding logical line transformers if we're in a
580 581 Python line."""
581 582 t = self.physical_line_transforms[:]
582 583 if not self.within_python_line:
583 584 t += [self.assemble_logical_lines] + self.logical_line_transforms
584 585 return t + [self.assemble_python_lines] + self.python_line_transforms
585 586
586 587 def reset(self):
587 588 """Reset the input buffer and associated state."""
588 589 super(IPythonInputSplitter, self).reset()
589 590 self._buffer_raw[:] = []
590 591 self.source_raw = ''
591 592 self.transformer_accumulating = False
592 593 self.within_python_line = False
593 594
594 595 for t in self.transforms:
595 596 try:
596 597 t.reset()
597 598 except SyntaxError:
598 599 # Nothing that calls reset() expects to handle transformer
599 600 # errors
600 601 pass
601 602
602 603 def flush_transformers(self):
603 604 def _flush(transform, outs):
604 605 """yield transformed lines
605 606
606 607 always strings, never None
607 608
608 609 transform: the current transform
609 610 outs: an iterable of previously transformed inputs.
610 611 Each may be multiline, which will be passed
611 612 one line at a time to transform.
612 613 """
613 614 for out in outs:
614 615 for line in out.splitlines():
615 616 # push one line at a time
616 617 tmp = transform.push(line)
617 618 if tmp is not None:
618 619 yield tmp
619 620
620 621 # reset the transform
621 622 tmp = transform.reset()
622 623 if tmp is not None:
623 624 yield tmp
624 625
625 626 out = []
626 627 for t in self.transforms_in_use:
627 628 out = _flush(t, out)
628 629
629 630 out = list(out)
630 631 if out:
631 632 self._store('\n'.join(out))
632 633
633 634 def raw_reset(self):
634 635 """Return raw input only and perform a full reset.
635 636 """
636 637 out = self.source_raw
637 638 self.reset()
638 639 return out
639 640
640 641 def source_reset(self):
641 642 try:
642 643 self.flush_transformers()
643 644 return self.source
644 645 finally:
645 646 self.reset()
646 647
647 648 def push_accepts_more(self):
648 649 if self.transformer_accumulating:
649 650 return True
650 651 else:
651 652 return super(IPythonInputSplitter, self).push_accepts_more()
652 653
653 654 def transform_cell(self, cell):
654 655 """Process and translate a cell of input.
655 656 """
656 657 self.reset()
657 658 try:
658 659 self.push(cell)
659 660 self.flush_transformers()
660 661 return self.source
661 662 finally:
662 663 self.reset()
663 664
664 665 def push(self, lines):
665 666 """Push one or more lines of IPython input.
666 667
667 668 This stores the given lines and returns a status code indicating
668 669 whether the code forms a complete Python block or not, after processing
669 670 all input lines for special IPython syntax.
670 671
671 672 Any exceptions generated in compilation are swallowed, but if an
672 673 exception was produced, the method returns True.
673 674
674 675 Parameters
675 676 ----------
676 677 lines : string
677 678 One or more lines of Python input.
678 679
679 680 Returns
680 681 -------
681 682 is_complete : boolean
682 683 True if the current input source (the result of the current input
683 684 plus prior inputs) forms a complete Python execution block. Note that
684 685 this value is also stored as a private attribute (_is_complete), so it
685 686 can be queried at any time.
686 687 """
687 688
688 689 # We must ensure all input is pure unicode
690 lines = cast_unicode(lines, self.encoding)
689 691 # ''.splitlines() --> [], but we need to push the empty line to transformers
690 692 lines_list = lines.splitlines()
691 693 if not lines_list:
692 694 lines_list = ['']
693 695
694 696 # Store raw source before applying any transformations to it. Note
695 697 # that this must be done *after* the reset() call that would otherwise
696 698 # flush the buffer.
697 699 self._store(lines, self._buffer_raw, 'source_raw')
698 700
699 701 for line in lines_list:
700 702 out = self.push_line(line)
701 703
702 704 return out
703 705
704 706 def push_line(self, line):
705 707 buf = self._buffer
706 708
707 709 def _accumulating(dbg):
708 710 #print(dbg)
709 711 self.transformer_accumulating = True
710 712 return False
711 713
712 714 for transformer in self.physical_line_transforms:
713 715 line = transformer.push(line)
714 716 if line is None:
715 717 return _accumulating(transformer)
716 718
717 719 if not self.within_python_line:
718 720 line = self.assemble_logical_lines.push(line)
719 721 if line is None:
720 722 return _accumulating('acc logical line')
721 723
722 724 for transformer in self.logical_line_transforms:
723 725 line = transformer.push(line)
724 726 if line is None:
725 727 return _accumulating(transformer)
726 728
727 729 line = self.assemble_python_lines.push(line)
728 730 if line is None:
729 731 self.within_python_line = True
730 732 return _accumulating('acc python line')
731 733 else:
732 734 self.within_python_line = False
733 735
734 736 for transformer in self.python_line_transforms:
735 737 line = transformer.push(line)
736 738 if line is None:
737 739 return _accumulating(transformer)
738 740
739 741 #print("transformers clear") #debug
740 742 self.transformer_accumulating = False
741 743 return super(IPythonInputSplitter, self).push(line)
@@ -1,184 +1,193 b''
1 1
2 2 ===========================
3 3 Custom input transformation
4 4 ===========================
5 5
6 6 IPython extends Python syntax to allow things like magic commands, and help with
7 7 the ``?`` syntax. There are several ways to customise how the user's input is
8 8 processed into Python code to be executed.
9 9
10 10 These hooks are mainly for other projects using IPython as the core of their
11 11 interactive interface. Using them carelessly can easily break IPython!
12 12
13 13 String based transformations
14 14 ============================
15 15
16 16 .. currentmodule:: IPython.core.inputtransforms
17 17
18 18 When the user enters a line of code, it is first processed as a string. By the
19 19 end of this stage, it must be valid Python syntax.
20 20
21 21 These transformers all subclass :class:`IPython.core.inputtransformer.InputTransformer`,
22 22 and are used by :class:`IPython.core.inputsplitter.IPythonInputSplitter`.
23 23
24 24 These transformers act in three groups, stored separately as lists of instances
25 25 in attributes of :class:`~IPython.core.inputsplitter.IPythonInputSplitter`:
26 26
27 27 * ``physical_line_transforms`` act on the lines as the user enters them. For
28 28 example, these strip Python prompts from examples pasted in.
29 29 * ``logical_line_transforms`` act on lines as connected by explicit line
30 30 continuations, i.e. ``\`` at the end of physical lines. They are skipped
31 31 inside multiline Python statements. This is the point where IPython recognises
32 32 ``%magic`` commands, for instance.
33 33 * ``python_line_transforms`` act on blocks containing complete Python statements.
34 34 Multi-line strings, lists and function calls are reassembled before being
35 35 passed to these, but note that function and class *definitions* are still a
36 36 series of separate statements. IPython does not use any of these by default.
37 37
38 38 An InteractiveShell instance actually has two
39 39 :class:`~IPython.core.inputsplitter.IPythonInputSplitter` instances, as the
40 40 attributes :attr:`~IPython.core.interactiveshell.InteractiveShell.input_splitter`,
41 41 to tell when a block of input is complete, and
42 42 :attr:`~IPython.core.interactiveshell.InteractiveShell.input_transformer_manager`,
43 43 to transform complete cells. If you add a transformer, you should make sure that
44 44 it gets added to both, e.g.::
45 45
46 46 ip.input_splitter.logical_line_transforms.append(my_transformer())
47 47 ip.input_transformer_manager.logical_line_transforms.append(my_transformer())
48 48
49 49 These transformers may raise :exc:`SyntaxError` if the input code is invalid, but
50 50 in most cases it is clearer to pass unrecognised code through unmodified and let
51 51 Python's own parser decide whether it is valid.
52 52
53 53 .. versionchanged:: 2.0
54 54
55 55 Added the option to raise :exc:`SyntaxError`.
56 56
57 57 Stateless transformations
58 58 -------------------------
59 59
60 60 The simplest kind of transformations work one line at a time. Write a function
61 61 which takes a line and returns a line, and decorate it with
62 62 :meth:`StatelessInputTransformer.wrap`::
63 63
64 64 @StatelessInputTransformer.wrap
65 65 def my_special_commands(line):
66 66 if line.startswith("Β¬"):
67 67 return "specialcommand(" + repr(line) + ")"
68 68 return line
69 69
70 70 The decorator returns a factory function which will produce instances of
71 71 :class:`~IPython.core.inputtransformer.StatelessInputTransformer` using your
72 72 function.
73 73
74 74 Transforming a full block
75 75 -------------------------
76 76
77 .. warning::
78
79 Transforming a full block at once will break the automatic detection of
80 wether a block of code is complete in interface relying on this
81 functionality, as for example terminal IPython. You will need to use a
82 shortcut to force-execute your cells.
83
77 84 Transforming a full block of python code is possible by implementing a
78 85 :class:`~IPython.core.inputtransformer.Inputtransformer` and overwriting the
79 86 ``push`` and ``reset`` methods. The reset method should send the full block of
80 87 transformed text. As an example a transformer the reversed the lines from last
81 88 to first.
82 89
83 90 from IPython.core.inputtransformer import InputTransformer
84 91
85 92 class ReverseLineTransformer(InputTransformer):
86 93
87 94 def __init__(self):
88 95 self.acc = []
89 96
90 97 def push(self, line):
91 98 self.acc.append(line)
92 99 return None
93 100
94 101 def reset(self):
95 102 ret = '\n'.join(self.acc[::-1])
96 103 self.acc = []
97 104 return ret
98 105
99 106
100 107 Coroutine transformers
101 108 ----------------------
102 109
103 110 More advanced transformers can be written as coroutines. The coroutine will be
104 111 sent each line in turn, followed by ``None`` to reset it. It can yield lines, or
105 112 ``None`` if it is accumulating text to yield at a later point. When reset, it
106 113 should give up any code it has accumulated.
107 114
108 115 You may use :meth:`CoroutineInputTransformer.wrap` to simplify the creation of
109 116 such a transformer.
110 117
111 Here is a simple :class:`CoroutineInputTransformer` that can be though of be
118 Here is a simple :class:`CoroutineInputTransformer` that can be thought of
112 119 being the identity::
113 120
121 from IPython.core.inputtransformer import CoroutineInputTransformer
122
114 123 @CoroutineInputTransformer.wrap
115 124 def noop():
116 125 line = ''
117 126 while True:
118 127 line = (yield line)
119 128
120 129 ip = get_ipython()
121 130
122 131 ip.input_splitter.logical_line_transforms.append(noop())
123 132 ip.input_transformer_manager.logical_line_transforms.append(noop())
124 133
125 134 This code in IPython strips a constant amount of leading indentation from each
126 135 line in a cell::
127 136
128 137 from IPython.core.inputtransformer import CoroutineInputTransformer
129 138
130 139 @CoroutineInputTransformer.wrap
131 140 def leading_indent():
132 141 """Remove leading indentation.
133 142
134 143 If the first line starts with a spaces or tabs, the same whitespace will be
135 144 removed from each following line until it is reset.
136 145 """
137 146 space_re = re.compile(r'^[ \t]+')
138 147 line = ''
139 148 while True:
140 149 line = (yield line)
141 150
142 151 if line is None:
143 152 continue
144 153
145 154 m = space_re.match(line)
146 155 if m:
147 156 space = m.group(0)
148 157 while line is not None:
149 158 if line.startswith(space):
150 159 line = line[len(space):]
151 160 line = (yield line)
152 161 else:
153 162 # No leading spaces - wait for reset
154 163 while line is not None:
155 164 line = (yield line)
156 165
157 166
158 167 Token-based transformers
159 168 ------------------------
160 169
161 170 There is an experimental framework that takes care of tokenizing and
162 171 untokenizing lines of code. Define a function that accepts a list of tokens, and
163 172 returns an iterable of output tokens, and decorate it with
164 173 :meth:`TokenInputTransformer.wrap`. These should only be used in
165 174 ``python_line_transforms``.
166 175
167 176 AST transformations
168 177 ===================
169 178
170 179 After the code has been parsed as Python syntax, you can use Python's powerful
171 180 *Abstract Syntax Tree* tools to modify it. Subclass :class:`ast.NodeTransformer`,
172 181 and add an instance to ``shell.ast_transformers``.
173 182
174 183 This example wraps integer literals in an ``Integer`` class, which is useful for
175 184 mathematical frameworks that want to handle e.g. ``1/3`` as a precise fraction::
176 185
177 186
178 187 class IntegerWrapper(ast.NodeTransformer):
179 188 """Wraps all integers in a call to Integer()"""
180 189 def visit_Num(self, node):
181 190 if isinstance(node.n, int):
182 191 return ast.Call(func=ast.Name(id='Integer', ctx=ast.Load()),
183 192 args=[node], keywords=[])
184 193 return node
General Comments 0
You need to be logged in to leave comments. Login now