##// END OF EJS Templates
Expand a bit the documentation about transformers....
Matthias Bussonnier -
Show More
@@ -1,743 +1,741 b''
1 1 """Input handling and transformation machinery.
2 2
3 3 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 4 input from a line-oriented frontend is complete and should be executed, and when
5 5 the user should be prompted for another line of code instead. The name 'input
6 6 splitter' is largely for historical reasons.
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 12 and stores the results.
13 13
14 14 For more details, see the class docstrings below.
15 15 """
16 16
17 17 # Copyright (c) IPython Development Team.
18 18 # Distributed under the terms of the Modified BSD License.
19 19 import ast
20 20 import codeop
21 21 import io
22 22 import re
23 23 import sys
24 24 import tokenize
25 25 import warnings
26 26
27 from IPython.utils.py3compat import cast_unicode
28 27 from IPython.core.inputtransformer import (leading_indent,
29 28 classic_prompt,
30 29 ipy_prompt,
31 30 cellmagic,
32 31 assemble_logical_lines,
33 32 help_end,
34 33 escaped_commands,
35 34 assign_from_magic,
36 35 assign_from_system,
37 36 assemble_python_lines,
38 37 )
39 38
40 39 # These are available in this module for backwards compatibility.
41 40 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
42 41 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
43 42 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
44 43
45 44 #-----------------------------------------------------------------------------
46 45 # Utilities
47 46 #-----------------------------------------------------------------------------
48 47
49 48 # FIXME: These are general-purpose utilities that later can be moved to the
50 49 # general ward. Kept here for now because we're being very strict about test
51 50 # coverage with this code, and this lets us ensure that we keep 100% coverage
52 51 # while developing.
53 52
54 53 # compiled regexps for autoindent management
55 54 dedent_re = re.compile('|'.join([
56 55 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
57 56 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
58 57 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
59 58 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
60 59 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
61 60 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
62 61 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
63 62 ]))
64 63 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
65 64
66 65 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
67 66 # before pure comments
68 67 comment_line_re = re.compile('^\s*\#')
69 68
70 69
71 70 def num_ini_spaces(s):
72 71 """Return the number of initial spaces in a string.
73 72
74 73 Note that tabs are counted as a single space. For now, we do *not* support
75 74 mixing of tabs and spaces in the user's input.
76 75
77 76 Parameters
78 77 ----------
79 78 s : string
80 79
81 80 Returns
82 81 -------
83 82 n : int
84 83 """
85 84
86 85 ini_spaces = ini_spaces_re.match(s)
87 86 if ini_spaces:
88 87 return ini_spaces.end()
89 88 else:
90 89 return 0
91 90
92 91 # Fake token types for partial_tokenize:
93 92 INCOMPLETE_STRING = tokenize.N_TOKENS
94 93 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
95 94
96 95 # The 2 classes below have the same API as TokenInfo, but don't try to look up
97 96 # a token type name that they won't find.
98 97 class IncompleteString:
99 98 type = exact_type = INCOMPLETE_STRING
100 99 def __init__(self, s, start, end, line):
101 100 self.s = s
102 101 self.start = start
103 102 self.end = end
104 103 self.line = line
105 104
106 105 class InMultilineStatement:
107 106 type = exact_type = IN_MULTILINE_STATEMENT
108 107 def __init__(self, pos, line):
109 108 self.s = ''
110 109 self.start = self.end = pos
111 110 self.line = line
112 111
113 112 def partial_tokens(s):
114 113 """Iterate over tokens from a possibly-incomplete string of code.
115 114
116 115 This adds two special token types: INCOMPLETE_STRING and
117 116 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
118 117 represent the two main ways for code to be incomplete.
119 118 """
120 119 readline = io.StringIO(s).readline
121 120 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
122 121 try:
123 122 for token in tokenize.generate_tokens(readline):
124 123 yield token
125 124 except tokenize.TokenError as e:
126 125 # catch EOF error
127 126 lines = s.splitlines(keepends=True)
128 127 end = len(lines), len(lines[-1])
129 128 if 'multi-line string' in e.args[0]:
130 129 l, c = start = token.end
131 130 s = lines[l-1][c:] + ''.join(lines[l:])
132 131 yield IncompleteString(s, start, end, lines[-1])
133 132 elif 'multi-line statement' in e.args[0]:
134 133 yield InMultilineStatement(end, lines[-1])
135 134 else:
136 135 raise
137 136
138 137 def find_next_indent(code):
139 138 """Find the number of spaces for the next line of indentation"""
140 139 tokens = list(partial_tokens(code))
141 140 if tokens[-1].type == tokenize.ENDMARKER:
142 141 tokens.pop()
143 142 if not tokens:
144 143 return 0
145 144 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
146 145 tokens.pop()
147 146
148 147 if tokens[-1].type == INCOMPLETE_STRING:
149 148 # Inside a multiline string
150 149 return 0
151 150
152 151 # Find the indents used before
153 152 prev_indents = [0]
154 153 def _add_indent(n):
155 154 if n != prev_indents[-1]:
156 155 prev_indents.append(n)
157 156
158 157 tokiter = iter(tokens)
159 158 for tok in tokiter:
160 159 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
161 160 _add_indent(tok.end[1])
162 161 elif (tok.type == tokenize.NL):
163 162 try:
164 163 _add_indent(next(tokiter).start[1])
165 164 except StopIteration:
166 165 break
167 166
168 167 last_indent = prev_indents.pop()
169 168
170 169 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
171 170 if tokens[-1].type == IN_MULTILINE_STATEMENT:
172 171 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
173 172 return last_indent + 4
174 173 return last_indent
175 174
176 175 if tokens[-1].exact_type == tokenize.COLON:
177 176 # Line ends with colon - indent
178 177 return last_indent + 4
179 178
180 179 if last_indent:
181 180 # Examine the last line for dedent cues - statements like return or
182 181 # raise which normally end a block of code.
183 182 last_line_starts = 0
184 183 for i, tok in enumerate(tokens):
185 184 if tok.type == tokenize.NEWLINE:
186 185 last_line_starts = i + 1
187 186
188 187 last_line_tokens = tokens[last_line_starts:]
189 188 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
190 189 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
191 190 # Find the most recent indentation less than the current level
192 191 for indent in reversed(prev_indents):
193 192 if indent < last_indent:
194 193 return indent
195 194
196 195 return last_indent
197 196
198 197
199 198 def last_blank(src):
200 199 """Determine if the input source ends in a blank.
201 200
202 201 A blank is either a newline or a line consisting of whitespace.
203 202
204 203 Parameters
205 204 ----------
206 205 src : string
207 206 A single or multiline string.
208 207 """
209 208 if not src: return False
210 209 ll = src.splitlines()[-1]
211 210 return (ll == '') or ll.isspace()
212 211
213 212
214 213 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
215 214 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
216 215
217 216 def last_two_blanks(src):
218 217 """Determine if the input source ends in two blanks.
219 218
220 219 A blank is either a newline or a line consisting of whitespace.
221 220
222 221 Parameters
223 222 ----------
224 223 src : string
225 224 A single or multiline string.
226 225 """
227 226 if not src: return False
228 227 # The logic here is tricky: I couldn't get a regexp to work and pass all
229 228 # the tests, so I took a different approach: split the source by lines,
230 229 # grab the last two and prepend '###\n' as a stand-in for whatever was in
231 230 # the body before the last two lines. Then, with that structure, it's
232 231 # possible to analyze with two regexps. Not the most elegant solution, but
233 232 # it works. If anyone tries to change this logic, make sure to validate
234 233 # the whole test suite first!
235 234 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
236 235 return (bool(last_two_blanks_re.match(new_src)) or
237 236 bool(last_two_blanks_re2.match(new_src)) )
238 237
239 238
240 239 def remove_comments(src):
241 240 """Remove all comments from input source.
242 241
243 242 Note: comments are NOT recognized inside of strings!
244 243
245 244 Parameters
246 245 ----------
247 246 src : string
248 247 A single or multiline input string.
249 248
250 249 Returns
251 250 -------
252 251 String with all Python comments removed.
253 252 """
254 253
255 254 return re.sub('#.*', '', src)
256 255
257 256
258 257 def get_input_encoding():
259 258 """Return the default standard input encoding.
260 259
261 260 If sys.stdin has no encoding, 'ascii' is returned."""
262 261 # There are strange environments for which sys.stdin.encoding is None. We
263 262 # ensure that a valid encoding is returned.
264 263 encoding = getattr(sys.stdin, 'encoding', None)
265 264 if encoding is None:
266 265 encoding = 'ascii'
267 266 return encoding
268 267
269 268 #-----------------------------------------------------------------------------
270 269 # Classes and functions for normal Python syntax handling
271 270 #-----------------------------------------------------------------------------
272 271
273 272 class InputSplitter(object):
274 273 r"""An object that can accumulate lines of Python source before execution.
275 274
276 275 This object is designed to be fed python source line-by-line, using
277 276 :meth:`push`. It will return on each push whether the currently pushed
278 277 code could be executed already. In addition, it provides a method called
279 278 :meth:`push_accepts_more` that can be used to query whether more input
280 279 can be pushed into a single interactive block.
281 280
282 281 This is a simple example of how an interactive terminal-based client can use
283 282 this tool::
284 283
285 284 isp = InputSplitter()
286 285 while isp.push_accepts_more():
287 286 indent = ' '*isp.indent_spaces
288 287 prompt = '>>> ' + indent
289 288 line = indent + raw_input(prompt)
290 289 isp.push(line)
291 290 print 'Input source was:\n', isp.source_reset(),
292 291 """
293 292 # Number of spaces of indentation computed from input that has been pushed
294 293 # so far. This is the attributes callers should query to get the current
295 294 # indentation level, in order to provide auto-indent facilities.
296 295 indent_spaces = 0
297 296 # String, indicating the default input encoding. It is computed by default
298 297 # at initialization time via get_input_encoding(), but it can be reset by a
299 298 # client with specific knowledge of the encoding.
300 299 encoding = ''
301 300 # String where the current full source input is stored, properly encoded.
302 301 # Reading this attribute is the normal way of querying the currently pushed
303 302 # source code, that has been properly encoded.
304 303 source = ''
305 304 # Code object corresponding to the current source. It is automatically
306 305 # synced to the source, so it can be queried at any time to obtain the code
307 306 # object; it will be None if the source doesn't compile to valid Python.
308 307 code = None
309 308
310 309 # Private attributes
311 310
312 311 # List with lines of input accumulated so far
313 312 _buffer = None
314 313 # Command compiler
315 314 _compile = None
316 315 # Mark when input has changed indentation all the way back to flush-left
317 316 _full_dedent = False
318 317 # Boolean indicating whether the current block is complete
319 318 _is_complete = None
320 319 # Boolean indicating whether the current block has an unrecoverable syntax error
321 320 _is_invalid = False
322 321
323 322 def __init__(self):
324 323 """Create a new InputSplitter instance.
325 324 """
326 325 self._buffer = []
327 326 self._compile = codeop.CommandCompiler()
328 327 self.encoding = get_input_encoding()
329 328
330 329 def reset(self):
331 330 """Reset the input buffer and associated state."""
332 331 self.indent_spaces = 0
333 332 self._buffer[:] = []
334 333 self.source = ''
335 334 self.code = None
336 335 self._is_complete = False
337 336 self._is_invalid = False
338 337 self._full_dedent = False
339 338
340 339 def source_reset(self):
341 340 """Return the input source and perform a full reset.
342 341 """
343 342 out = self.source
344 343 self.reset()
345 344 return out
346 345
347 346 def check_complete(self, source):
348 347 """Return whether a block of code is ready to execute, or should be continued
349 348
350 349 This is a non-stateful API, and will reset the state of this InputSplitter.
351 350
352 351 Parameters
353 352 ----------
354 353 source : string
355 354 Python input code, which can be multiline.
356 355
357 356 Returns
358 357 -------
359 358 status : str
360 359 One of 'complete', 'incomplete', or 'invalid' if source is not a
361 360 prefix of valid code.
362 361 indent_spaces : int or None
363 362 The number of spaces by which to indent the next line of code. If
364 363 status is not 'incomplete', this is None.
365 364 """
366 365 self.reset()
367 366 try:
368 367 self.push(source)
369 368 except SyntaxError:
370 369 # Transformers in IPythonInputSplitter can raise SyntaxError,
371 370 # which push() will not catch.
372 371 return 'invalid', None
373 372 else:
374 373 if self._is_invalid:
375 374 return 'invalid', None
376 375 elif self.push_accepts_more():
377 376 return 'incomplete', self.indent_spaces
378 377 else:
379 378 return 'complete', None
380 379 finally:
381 380 self.reset()
382 381
383 382 def push(self, lines):
384 383 """Push one or more lines of input.
385 384
386 385 This stores the given lines and returns a status code indicating
387 386 whether the code forms a complete Python block or not.
388 387
389 388 Any exceptions generated in compilation are swallowed, but if an
390 389 exception was produced, the method returns True.
391 390
392 391 Parameters
393 392 ----------
394 393 lines : string
395 394 One or more lines of Python input.
396 395
397 396 Returns
398 397 -------
399 398 is_complete : boolean
400 399 True if the current input source (the result of the current input
401 400 plus prior inputs) forms a complete Python execution block. Note that
402 401 this value is also stored as a private attribute (``_is_complete``), so it
403 402 can be queried at any time.
404 403 """
405 404 self._store(lines)
406 405 source = self.source
407 406
408 407 # Before calling _compile(), reset the code object to None so that if an
409 408 # exception is raised in compilation, we don't mislead by having
410 409 # inconsistent code/source attributes.
411 410 self.code, self._is_complete = None, None
412 411 self._is_invalid = False
413 412
414 413 # Honor termination lines properly
415 414 if source.endswith('\\\n'):
416 415 return False
417 416
418 417 self._update_indent()
419 418 try:
420 419 with warnings.catch_warnings():
421 420 warnings.simplefilter('error', SyntaxWarning)
422 421 self.code = self._compile(source, symbol="exec")
423 422 # Invalid syntax can produce any of a number of different errors from
424 423 # inside the compiler, so we have to catch them all. Syntax errors
425 424 # immediately produce a 'ready' block, so the invalid Python can be
426 425 # sent to the kernel for evaluation with possible ipython
427 426 # special-syntax conversion.
428 427 except (SyntaxError, OverflowError, ValueError, TypeError,
429 428 MemoryError, SyntaxWarning):
430 429 self._is_complete = True
431 430 self._is_invalid = True
432 431 else:
433 432 # Compilation didn't produce any exceptions (though it may not have
434 433 # given a complete code object)
435 434 self._is_complete = self.code is not None
436 435
437 436 return self._is_complete
438 437
439 438 def push_accepts_more(self):
440 439 """Return whether a block of interactive input can accept more input.
441 440
442 441 This method is meant to be used by line-oriented frontends, who need to
443 442 guess whether a block is complete or not based solely on prior and
444 443 current input lines. The InputSplitter considers it has a complete
445 444 interactive block and will not accept more input when either:
446 445
447 446 * A SyntaxError is raised
448 447
449 448 * The code is complete and consists of a single line or a single
450 449 non-compound statement
451 450
452 451 * The code is complete and has a blank line at the end
453 452
454 453 If the current input produces a syntax error, this method immediately
455 454 returns False but does *not* raise the syntax error exception, as
456 455 typically clients will want to send invalid syntax to an execution
457 456 backend which might convert the invalid syntax into valid Python via
458 457 one of the dynamic IPython mechanisms.
459 458 """
460 459
461 460 # With incomplete input, unconditionally accept more
462 461 # A syntax error also sets _is_complete to True - see push()
463 462 if not self._is_complete:
464 463 #print("Not complete") # debug
465 464 return True
466 465
467 466 # The user can make any (complete) input execute by leaving a blank line
468 467 last_line = self.source.splitlines()[-1]
469 468 if (not last_line) or last_line.isspace():
470 469 #print("Blank line") # debug
471 470 return False
472 471
473 472 # If there's just a single line or AST node, and we're flush left, as is
474 473 # the case after a simple statement such as 'a=1', we want to execute it
475 474 # straight away.
476 475 if self.indent_spaces==0:
477 476 if len(self.source.splitlines()) <= 1:
478 477 return False
479 478
480 479 try:
481 480 code_ast = ast.parse(u''.join(self._buffer))
482 481 except Exception:
483 482 #print("Can't parse AST") # debug
484 483 return False
485 484 else:
486 485 if len(code_ast.body) == 1 and \
487 486 not hasattr(code_ast.body[0], 'body'):
488 487 #print("Simple statement") # debug
489 488 return False
490 489
491 490 # General fallback - accept more code
492 491 return True
493 492
494 493 def _update_indent(self):
495 494 # self.source always has a trailing newline
496 495 self.indent_spaces = find_next_indent(self.source[:-1])
497 496 self._full_dedent = (self.indent_spaces == 0)
498 497
499 498 def _store(self, lines, buffer=None, store='source'):
500 499 """Store one or more lines of input.
501 500
502 501 If input lines are not newline-terminated, a newline is automatically
503 502 appended."""
504 503
505 504 if buffer is None:
506 505 buffer = self._buffer
507 506
508 507 if lines.endswith('\n'):
509 508 buffer.append(lines)
510 509 else:
511 510 buffer.append(lines+'\n')
512 511 setattr(self, store, self._set_source(buffer))
513 512
514 513 def _set_source(self, buffer):
515 514 return u''.join(buffer)
516 515
517 516
518 517 class IPythonInputSplitter(InputSplitter):
519 518 """An input splitter that recognizes all of IPython's special syntax."""
520 519
521 520 # String with raw, untransformed input.
522 521 source_raw = ''
523 522
524 523 # Flag to track when a transformer has stored input that it hasn't given
525 524 # back yet.
526 525 transformer_accumulating = False
527 526
528 527 # Flag to track when assemble_python_lines has stored input that it hasn't
529 528 # given back yet.
530 529 within_python_line = False
531 530
532 531 # Private attributes
533 532
534 533 # List with lines of raw input accumulated so far.
535 534 _buffer_raw = None
536 535
537 536 def __init__(self, line_input_checker=True, physical_line_transforms=None,
538 537 logical_line_transforms=None, python_line_transforms=None):
539 538 super(IPythonInputSplitter, self).__init__()
540 539 self._buffer_raw = []
541 540 self._validate = True
542 541
543 542 if physical_line_transforms is not None:
544 543 self.physical_line_transforms = physical_line_transforms
545 544 else:
546 545 self.physical_line_transforms = [
547 546 leading_indent(),
548 547 classic_prompt(),
549 548 ipy_prompt(),
550 549 cellmagic(end_on_blank_line=line_input_checker),
551 550 ]
552 551
553 552 self.assemble_logical_lines = assemble_logical_lines()
554 553 if logical_line_transforms is not None:
555 554 self.logical_line_transforms = logical_line_transforms
556 555 else:
557 556 self.logical_line_transforms = [
558 557 help_end(),
559 558 escaped_commands(),
560 559 assign_from_magic(),
561 560 assign_from_system(),
562 561 ]
563 562
564 563 self.assemble_python_lines = assemble_python_lines()
565 564 if python_line_transforms is not None:
566 565 self.python_line_transforms = python_line_transforms
567 566 else:
568 567 # We don't use any of these at present
569 568 self.python_line_transforms = []
570 569
571 570 @property
572 571 def transforms(self):
573 572 "Quick access to all transformers."
574 573 return self.physical_line_transforms + \
575 574 [self.assemble_logical_lines] + self.logical_line_transforms + \
576 575 [self.assemble_python_lines] + self.python_line_transforms
577 576
578 577 @property
579 578 def transforms_in_use(self):
580 579 """Transformers, excluding logical line transformers if we're in a
581 580 Python line."""
582 581 t = self.physical_line_transforms[:]
583 582 if not self.within_python_line:
584 583 t += [self.assemble_logical_lines] + self.logical_line_transforms
585 584 return t + [self.assemble_python_lines] + self.python_line_transforms
586 585
587 586 def reset(self):
588 587 """Reset the input buffer and associated state."""
589 588 super(IPythonInputSplitter, self).reset()
590 589 self._buffer_raw[:] = []
591 590 self.source_raw = ''
592 591 self.transformer_accumulating = False
593 592 self.within_python_line = False
594 593
595 594 for t in self.transforms:
596 595 try:
597 596 t.reset()
598 597 except SyntaxError:
599 598 # Nothing that calls reset() expects to handle transformer
600 599 # errors
601 600 pass
602 601
603 602 def flush_transformers(self):
604 603 def _flush(transform, outs):
605 604 """yield transformed lines
606 605
607 606 always strings, never None
608 607
609 608 transform: the current transform
610 609 outs: an iterable of previously transformed inputs.
611 610 Each may be multiline, which will be passed
612 611 one line at a time to transform.
613 612 """
614 613 for out in outs:
615 614 for line in out.splitlines():
616 615 # push one line at a time
617 616 tmp = transform.push(line)
618 617 if tmp is not None:
619 618 yield tmp
620 619
621 620 # reset the transform
622 621 tmp = transform.reset()
623 622 if tmp is not None:
624 623 yield tmp
625 624
626 625 out = []
627 626 for t in self.transforms_in_use:
628 627 out = _flush(t, out)
629 628
630 629 out = list(out)
631 630 if out:
632 631 self._store('\n'.join(out))
633 632
634 633 def raw_reset(self):
635 634 """Return raw input only and perform a full reset.
636 635 """
637 636 out = self.source_raw
638 637 self.reset()
639 638 return out
640 639
641 640 def source_reset(self):
642 641 try:
643 642 self.flush_transformers()
644 643 return self.source
645 644 finally:
646 645 self.reset()
647 646
648 647 def push_accepts_more(self):
649 648 if self.transformer_accumulating:
650 649 return True
651 650 else:
652 651 return super(IPythonInputSplitter, self).push_accepts_more()
653 652
654 653 def transform_cell(self, cell):
655 654 """Process and translate a cell of input.
656 655 """
657 656 self.reset()
658 657 try:
659 658 self.push(cell)
660 659 self.flush_transformers()
661 660 return self.source
662 661 finally:
663 662 self.reset()
664 663
665 664 def push(self, lines):
666 665 """Push one or more lines of IPython input.
667 666
668 667 This stores the given lines and returns a status code indicating
669 668 whether the code forms a complete Python block or not, after processing
670 669 all input lines for special IPython syntax.
671 670
672 671 Any exceptions generated in compilation are swallowed, but if an
673 672 exception was produced, the method returns True.
674 673
675 674 Parameters
676 675 ----------
677 676 lines : string
678 677 One or more lines of Python input.
679 678
680 679 Returns
681 680 -------
682 681 is_complete : boolean
683 682 True if the current input source (the result of the current input
684 683 plus prior inputs) forms a complete Python execution block. Note that
685 684 this value is also stored as a private attribute (_is_complete), so it
686 685 can be queried at any time.
687 686 """
688 687
689 688 # We must ensure all input is pure unicode
690 lines = cast_unicode(lines, self.encoding)
691 689 # ''.splitlines() --> [], but we need to push the empty line to transformers
692 690 lines_list = lines.splitlines()
693 691 if not lines_list:
694 692 lines_list = ['']
695 693
696 694 # Store raw source before applying any transformations to it. Note
697 695 # that this must be done *after* the reset() call that would otherwise
698 696 # flush the buffer.
699 697 self._store(lines, self._buffer_raw, 'source_raw')
700 698
701 699 for line in lines_list:
702 700 out = self.push_line(line)
703 701
704 702 return out
705 703
706 704 def push_line(self, line):
707 705 buf = self._buffer
708 706
709 707 def _accumulating(dbg):
710 708 #print(dbg)
711 709 self.transformer_accumulating = True
712 710 return False
713 711
714 712 for transformer in self.physical_line_transforms:
715 713 line = transformer.push(line)
716 714 if line is None:
717 715 return _accumulating(transformer)
718 716
719 717 if not self.within_python_line:
720 718 line = self.assemble_logical_lines.push(line)
721 719 if line is None:
722 720 return _accumulating('acc logical line')
723 721
724 722 for transformer in self.logical_line_transforms:
725 723 line = transformer.push(line)
726 724 if line is None:
727 725 return _accumulating(transformer)
728 726
729 727 line = self.assemble_python_lines.push(line)
730 728 if line is None:
731 729 self.within_python_line = True
732 730 return _accumulating('acc python line')
733 731 else:
734 732 self.within_python_line = False
735 733
736 734 for transformer in self.python_line_transforms:
737 735 line = transformer.push(line)
738 736 if line is None:
739 737 return _accumulating(transformer)
740 738
741 739 #print("transformers clear") #debug
742 740 self.transformer_accumulating = False
743 741 return super(IPythonInputSplitter, self).push(line)
@@ -1,139 +1,184 b''
1 1
2 2 ===========================
3 3 Custom input transformation
4 4 ===========================
5 5
6 6 IPython extends Python syntax to allow things like magic commands, and help with
7 7 the ``?`` syntax. There are several ways to customise how the user's input is
8 8 processed into Python code to be executed.
9 9
10 10 These hooks are mainly for other projects using IPython as the core of their
11 11 interactive interface. Using them carelessly can easily break IPython!
12 12
13 13 String based transformations
14 14 ============================
15 15
16 16 .. currentmodule:: IPython.core.inputtransforms
17 17
18 18 When the user enters a line of code, it is first processed as a string. By the
19 19 end of this stage, it must be valid Python syntax.
20 20
21 21 These transformers all subclass :class:`IPython.core.inputtransformer.InputTransformer`,
22 22 and are used by :class:`IPython.core.inputsplitter.IPythonInputSplitter`.
23 23
24 24 These transformers act in three groups, stored separately as lists of instances
25 25 in attributes of :class:`~IPython.core.inputsplitter.IPythonInputSplitter`:
26 26
27 27 * ``physical_line_transforms`` act on the lines as the user enters them. For
28 28 example, these strip Python prompts from examples pasted in.
29 29 * ``logical_line_transforms`` act on lines as connected by explicit line
30 30 continuations, i.e. ``\`` at the end of physical lines. They are skipped
31 31 inside multiline Python statements. This is the point where IPython recognises
32 32 ``%magic`` commands, for instance.
33 33 * ``python_line_transforms`` act on blocks containing complete Python statements.
34 34 Multi-line strings, lists and function calls are reassembled before being
35 35 passed to these, but note that function and class *definitions* are still a
36 36 series of separate statements. IPython does not use any of these by default.
37 37
38 38 An InteractiveShell instance actually has two
39 39 :class:`~IPython.core.inputsplitter.IPythonInputSplitter` instances, as the
40 40 attributes :attr:`~IPython.core.interactiveshell.InteractiveShell.input_splitter`,
41 41 to tell when a block of input is complete, and
42 42 :attr:`~IPython.core.interactiveshell.InteractiveShell.input_transformer_manager`,
43 43 to transform complete cells. If you add a transformer, you should make sure that
44 44 it gets added to both, e.g.::
45 45
46 46 ip.input_splitter.logical_line_transforms.append(my_transformer())
47 47 ip.input_transformer_manager.logical_line_transforms.append(my_transformer())
48 48
49 49 These transformers may raise :exc:`SyntaxError` if the input code is invalid, but
50 50 in most cases it is clearer to pass unrecognised code through unmodified and let
51 51 Python's own parser decide whether it is valid.
52 52
53 53 .. versionchanged:: 2.0
54 54
55 55 Added the option to raise :exc:`SyntaxError`.
56 56
57 57 Stateless transformations
58 58 -------------------------
59 59
60 60 The simplest kind of transformations work one line at a time. Write a function
61 61 which takes a line and returns a line, and decorate it with
62 62 :meth:`StatelessInputTransformer.wrap`::
63 63
64 64 @StatelessInputTransformer.wrap
65 65 def my_special_commands(line):
66 66 if line.startswith("Β¬"):
67 67 return "specialcommand(" + repr(line) + ")"
68 68 return line
69 69
70 70 The decorator returns a factory function which will produce instances of
71 71 :class:`~IPython.core.inputtransformer.StatelessInputTransformer` using your
72 72 function.
73 73
74 Transforming a full block
75 -------------------------
76
77 Transforming a full block of python code is possible by implementing a
78 :class:`~IPython.core.inputtransformer.Inputtransformer` and overwriting the
79 ``push`` and ``reset`` methods. The reset method should send the full block of
80 transformed text. As an example a transformer the reversed the lines from last
81 to first.
82
83 from IPython.core.inputtransformer import InputTransformer
84
85 class ReverseLineTransformer(InputTransformer):
86
87 def __init__(self):
88 self.acc = []
89
90 def push(self, line):
91 self.acc.append(line)
92 return None
93
94 def reset(self):
95 ret = '\n'.join(self.acc[::-1])
96 self.acc = []
97 return ret
98
99
74 100 Coroutine transformers
75 101 ----------------------
76 102
77 103 More advanced transformers can be written as coroutines. The coroutine will be
78 104 sent each line in turn, followed by ``None`` to reset it. It can yield lines, or
79 105 ``None`` if it is accumulating text to yield at a later point. When reset, it
80 106 should give up any code it has accumulated.
81 107
108 You may use :meth:`CoroutineInputTransformer.wrap` to simplify the creation of
109 such a transformer.
110
111 Here is a simple :class:`CoroutineInputTransformer` that can be though of be
112 being the identity::
113
114 @CoroutineInputTransformer.wrap
115 def noop():
116 line = ''
117 while True:
118 line = (yield line)
119
120 ip = get_ipython()
121
122 ip.input_splitter.logical_line_transforms.append(noop())
123 ip.input_transformer_manager.logical_line_transforms.append(noop())
124
82 125 This code in IPython strips a constant amount of leading indentation from each
83 126 line in a cell::
84 127
128 from IPython.core.inputtransformer import CoroutineInputTransformer
129
85 130 @CoroutineInputTransformer.wrap
86 131 def leading_indent():
87 132 """Remove leading indentation.
88 133
89 134 If the first line starts with a spaces or tabs, the same whitespace will be
90 135 removed from each following line until it is reset.
91 136 """
92 137 space_re = re.compile(r'^[ \t]+')
93 138 line = ''
94 139 while True:
95 140 line = (yield line)
96 141
97 142 if line is None:
98 143 continue
99 144
100 145 m = space_re.match(line)
101 146 if m:
102 147 space = m.group(0)
103 148 while line is not None:
104 149 if line.startswith(space):
105 150 line = line[len(space):]
106 151 line = (yield line)
107 152 else:
108 153 # No leading spaces - wait for reset
109 154 while line is not None:
110 155 line = (yield line)
111 156
112 157
113 158 Token-based transformers
114 159 ------------------------
115 160
116 161 There is an experimental framework that takes care of tokenizing and
117 162 untokenizing lines of code. Define a function that accepts a list of tokens, and
118 163 returns an iterable of output tokens, and decorate it with
119 164 :meth:`TokenInputTransformer.wrap`. These should only be used in
120 165 ``python_line_transforms``.
121 166
122 167 AST transformations
123 168 ===================
124 169
125 170 After the code has been parsed as Python syntax, you can use Python's powerful
126 171 *Abstract Syntax Tree* tools to modify it. Subclass :class:`ast.NodeTransformer`,
127 172 and add an instance to ``shell.ast_transformers``.
128 173
129 174 This example wraps integer literals in an ``Integer`` class, which is useful for
130 175 mathematical frameworks that want to handle e.g. ``1/3`` as a precise fraction::
131 176
132 177
133 178 class IntegerWrapper(ast.NodeTransformer):
134 179 """Wraps all integers in a call to Integer()"""
135 180 def visit_Num(self, node):
136 181 if isinstance(node.n, int):
137 182 return ast.Call(func=ast.Name(id='Integer', ctx=ast.Load()),
138 183 args=[node], keywords=[])
139 184 return node
General Comments 0
You need to be logged in to leave comments. Login now