##// END OF EJS Templates
Fix inputsplitter to pass empty lines to transformers
Thomas Kluyver -
Show More
@@ -1,634 +1,633 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63
64 64 #-----------------------------------------------------------------------------
65 65 # Imports
66 66 #-----------------------------------------------------------------------------
67 67 # stdlib
68 68 import ast
69 69 import codeop
70 70 import re
71 71 import sys
72 72
73 73 # IPython modules
74 74 from IPython.core.splitinput import split_user_input, LineInfo
75 75 from IPython.utils.py3compat import cast_unicode
76 76 from IPython.core.inputtransformer import (leading_indent,
77 77 classic_prompt,
78 78 ipy_prompt,
79 79 cellmagic,
80 80 help_end,
81 81 escaped_transformer,
82 82 assign_from_magic,
83 83 assign_from_system,
84 84 )
85 85
86 86 # Temporary!
87 87 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
88 88 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
89 89 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
90 90
91 91 #-----------------------------------------------------------------------------
92 92 # Utilities
93 93 #-----------------------------------------------------------------------------
94 94
95 95 # FIXME: These are general-purpose utilities that later can be moved to the
96 96 # general ward. Kept here for now because we're being very strict about test
97 97 # coverage with this code, and this lets us ensure that we keep 100% coverage
98 98 # while developing.
99 99
100 100 # compiled regexps for autoindent management
101 101 dedent_re = re.compile('|'.join([
102 102 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
103 103 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
104 104 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
105 105 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
106 106 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
107 107 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
108 108 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
109 109 ]))
110 110 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
111 111
112 112 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
113 113 # before pure comments
114 114 comment_line_re = re.compile('^\s*\#')
115 115
116 116
117 117 def num_ini_spaces(s):
118 118 """Return the number of initial spaces in a string.
119 119
120 120 Note that tabs are counted as a single space. For now, we do *not* support
121 121 mixing of tabs and spaces in the user's input.
122 122
123 123 Parameters
124 124 ----------
125 125 s : string
126 126
127 127 Returns
128 128 -------
129 129 n : int
130 130 """
131 131
132 132 ini_spaces = ini_spaces_re.match(s)
133 133 if ini_spaces:
134 134 return ini_spaces.end()
135 135 else:
136 136 return 0
137 137
138 138 def last_blank(src):
139 139 """Determine if the input source ends in a blank.
140 140
141 141 A blank is either a newline or a line consisting of whitespace.
142 142
143 143 Parameters
144 144 ----------
145 145 src : string
146 146 A single or multiline string.
147 147 """
148 148 if not src: return False
149 149 ll = src.splitlines()[-1]
150 150 return (ll == '') or ll.isspace()
151 151
152 152
153 153 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
154 154 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
155 155
156 156 def last_two_blanks(src):
157 157 """Determine if the input source ends in two blanks.
158 158
159 159 A blank is either a newline or a line consisting of whitespace.
160 160
161 161 Parameters
162 162 ----------
163 163 src : string
164 164 A single or multiline string.
165 165 """
166 166 if not src: return False
167 167 # The logic here is tricky: I couldn't get a regexp to work and pass all
168 168 # the tests, so I took a different approach: split the source by lines,
169 169 # grab the last two and prepend '###\n' as a stand-in for whatever was in
170 170 # the body before the last two lines. Then, with that structure, it's
171 171 # possible to analyze with two regexps. Not the most elegant solution, but
172 172 # it works. If anyone tries to change this logic, make sure to validate
173 173 # the whole test suite first!
174 174 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
175 175 return (bool(last_two_blanks_re.match(new_src)) or
176 176 bool(last_two_blanks_re2.match(new_src)) )
177 177
178 178
179 179 def remove_comments(src):
180 180 """Remove all comments from input source.
181 181
182 182 Note: comments are NOT recognized inside of strings!
183 183
184 184 Parameters
185 185 ----------
186 186 src : string
187 187 A single or multiline input string.
188 188
189 189 Returns
190 190 -------
191 191 String with all Python comments removed.
192 192 """
193 193
194 194 return re.sub('#.*', '', src)
195 195
196 196
197 197 def get_input_encoding():
198 198 """Return the default standard input encoding.
199 199
200 200 If sys.stdin has no encoding, 'ascii' is returned."""
201 201 # There are strange environments for which sys.stdin.encoding is None. We
202 202 # ensure that a valid encoding is returned.
203 203 encoding = getattr(sys.stdin, 'encoding', None)
204 204 if encoding is None:
205 205 encoding = 'ascii'
206 206 return encoding
207 207
208 208 #-----------------------------------------------------------------------------
209 209 # Classes and functions for normal Python syntax handling
210 210 #-----------------------------------------------------------------------------
211 211
212 212 class InputSplitter(object):
213 213 """An object that can accumulate lines of Python source before execution.
214 214
215 215 This object is designed to be fed python source line-by-line, using
216 216 :meth:`push`. It will return on each push whether the currently pushed
217 217 code could be executed already. In addition, it provides a method called
218 218 :meth:`push_accepts_more` that can be used to query whether more input
219 219 can be pushed into a single interactive block.
220 220
221 221 This is a simple example of how an interactive terminal-based client can use
222 222 this tool::
223 223
224 224 isp = InputSplitter()
225 225 while isp.push_accepts_more():
226 226 indent = ' '*isp.indent_spaces
227 227 prompt = '>>> ' + indent
228 228 line = indent + raw_input(prompt)
229 229 isp.push(line)
230 230 print 'Input source was:\n', isp.source_reset(),
231 231 """
232 232 # Number of spaces of indentation computed from input that has been pushed
233 233 # so far. This is the attributes callers should query to get the current
234 234 # indentation level, in order to provide auto-indent facilities.
235 235 indent_spaces = 0
236 236 # String, indicating the default input encoding. It is computed by default
237 237 # at initialization time via get_input_encoding(), but it can be reset by a
238 238 # client with specific knowledge of the encoding.
239 239 encoding = ''
240 240 # String where the current full source input is stored, properly encoded.
241 241 # Reading this attribute is the normal way of querying the currently pushed
242 242 # source code, that has been properly encoded.
243 243 source = ''
244 244 # Code object corresponding to the current source. It is automatically
245 245 # synced to the source, so it can be queried at any time to obtain the code
246 246 # object; it will be None if the source doesn't compile to valid Python.
247 247 code = None
248 248 # Input mode
249 249 input_mode = 'line'
250 250
251 251 # Private attributes
252 252
253 253 # List with lines of input accumulated so far
254 254 _buffer = None
255 255 # Command compiler
256 256 _compile = None
257 257 # Mark when input has changed indentation all the way back to flush-left
258 258 _full_dedent = False
259 259 # Boolean indicating whether the current block is complete
260 260 _is_complete = None
261 261
262 262 def __init__(self, input_mode=None):
263 263 """Create a new InputSplitter instance.
264 264
265 265 Parameters
266 266 ----------
267 267 input_mode : str
268 268
269 269 One of ['line', 'cell']; default is 'line'.
270 270
271 271 The input_mode parameter controls how new inputs are used when fed via
272 272 the :meth:`push` method:
273 273
274 274 - 'line': meant for line-oriented clients, inputs are appended one at a
275 275 time to the internal buffer and the whole buffer is compiled.
276 276
277 277 - 'cell': meant for clients that can edit multi-line 'cells' of text at
278 278 a time. A cell can contain one or more blocks that can be compile in
279 279 'single' mode by Python. In this mode, each new input new input
280 280 completely replaces all prior inputs. Cell mode is thus equivalent
281 281 to prepending a full reset() to every push() call.
282 282 """
283 283 self._buffer = []
284 284 self._compile = codeop.CommandCompiler()
285 285 self.encoding = get_input_encoding()
286 286 self.input_mode = InputSplitter.input_mode if input_mode is None \
287 287 else input_mode
288 288
289 289 def reset(self):
290 290 """Reset the input buffer and associated state."""
291 291 self.indent_spaces = 0
292 292 self._buffer[:] = []
293 293 self.source = ''
294 294 self.code = None
295 295 self._is_complete = False
296 296 self._full_dedent = False
297 297
298 298 def source_reset(self):
299 299 """Return the input source and perform a full reset.
300 300 """
301 301 out = self.source
302 302 self.reset()
303 303 return out
304 304
305 305 def push(self, lines):
306 306 """Push one or more lines of input.
307 307
308 308 This stores the given lines and returns a status code indicating
309 309 whether the code forms a complete Python block or not.
310 310
311 311 Any exceptions generated in compilation are swallowed, but if an
312 312 exception was produced, the method returns True.
313 313
314 314 Parameters
315 315 ----------
316 316 lines : string
317 317 One or more lines of Python input.
318 318
319 319 Returns
320 320 -------
321 321 is_complete : boolean
322 322 True if the current input source (the result of the current input
323 323 plus prior inputs) forms a complete Python execution block. Note that
324 324 this value is also stored as a private attribute (``_is_complete``), so it
325 325 can be queried at any time.
326 326 """
327 327 if self.input_mode == 'cell':
328 328 self.reset()
329 329
330 330 self._store(lines)
331 331 source = self.source
332 332
333 333 # Before calling _compile(), reset the code object to None so that if an
334 334 # exception is raised in compilation, we don't mislead by having
335 335 # inconsistent code/source attributes.
336 336 self.code, self._is_complete = None, None
337 337
338 338 # Honor termination lines properly
339 339 if source.endswith('\\\n'):
340 340 return False
341 341
342 342 self._update_indent(lines)
343 343 try:
344 344 self.code = self._compile(source, symbol="exec")
345 345 # Invalid syntax can produce any of a number of different errors from
346 346 # inside the compiler, so we have to catch them all. Syntax errors
347 347 # immediately produce a 'ready' block, so the invalid Python can be
348 348 # sent to the kernel for evaluation with possible ipython
349 349 # special-syntax conversion.
350 350 except (SyntaxError, OverflowError, ValueError, TypeError,
351 351 MemoryError):
352 352 self._is_complete = True
353 353 else:
354 354 # Compilation didn't produce any exceptions (though it may not have
355 355 # given a complete code object)
356 356 self._is_complete = self.code is not None
357 357
358 358 return self._is_complete
359 359
360 360 def push_accepts_more(self):
361 361 """Return whether a block of interactive input can accept more input.
362 362
363 363 This method is meant to be used by line-oriented frontends, who need to
364 364 guess whether a block is complete or not based solely on prior and
365 365 current input lines. The InputSplitter considers it has a complete
366 366 interactive block and will not accept more input only when either a
367 367 SyntaxError is raised, or *all* of the following are true:
368 368
369 369 1. The input compiles to a complete statement.
370 370
371 371 2. The indentation level is flush-left (because if we are indented,
372 372 like inside a function definition or for loop, we need to keep
373 373 reading new input).
374 374
375 375 3. There is one extra line consisting only of whitespace.
376 376
377 377 Because of condition #3, this method should be used only by
378 378 *line-oriented* frontends, since it means that intermediate blank lines
379 379 are not allowed in function definitions (or any other indented block).
380 380
381 381 If the current input produces a syntax error, this method immediately
382 382 returns False but does *not* raise the syntax error exception, as
383 383 typically clients will want to send invalid syntax to an execution
384 384 backend which might convert the invalid syntax into valid Python via
385 385 one of the dynamic IPython mechanisms.
386 386 """
387 387
388 388 # With incomplete input, unconditionally accept more
389 389 if not self._is_complete:
390 390 return True
391 391
392 392 # If we already have complete input and we're flush left, the answer
393 393 # depends. In line mode, if there hasn't been any indentation,
394 394 # that's it. If we've come back from some indentation, we need
395 395 # the blank final line to finish.
396 396 # In cell mode, we need to check how many blocks the input so far
397 397 # compiles into, because if there's already more than one full
398 398 # independent block of input, then the client has entered full
399 399 # 'cell' mode and is feeding lines that each is complete. In this
400 400 # case we should then keep accepting. The Qt terminal-like console
401 401 # does precisely this, to provide the convenience of terminal-like
402 402 # input of single expressions, but allowing the user (with a
403 403 # separate keystroke) to switch to 'cell' mode and type multiple
404 404 # expressions in one shot.
405 405 if self.indent_spaces==0:
406 406 if self.input_mode=='line':
407 407 if not self._full_dedent:
408 408 return False
409 409 else:
410 410 try:
411 411 code_ast = ast.parse(u''.join(self._buffer))
412 412 except Exception:
413 413 return False
414 414 else:
415 415 if len(code_ast.body) == 1:
416 416 return False
417 417
418 418 # When input is complete, then termination is marked by an extra blank
419 419 # line at the end.
420 420 last_line = self.source.splitlines()[-1]
421 421 return bool(last_line and not last_line.isspace())
422 422
423 423 #------------------------------------------------------------------------
424 424 # Private interface
425 425 #------------------------------------------------------------------------
426 426
427 427 def _find_indent(self, line):
428 428 """Compute the new indentation level for a single line.
429 429
430 430 Parameters
431 431 ----------
432 432 line : str
433 433 A single new line of non-whitespace, non-comment Python input.
434 434
435 435 Returns
436 436 -------
437 437 indent_spaces : int
438 438 New value for the indent level (it may be equal to self.indent_spaces
439 439 if indentation doesn't change.
440 440
441 441 full_dedent : boolean
442 442 Whether the new line causes a full flush-left dedent.
443 443 """
444 444 indent_spaces = self.indent_spaces
445 445 full_dedent = self._full_dedent
446 446
447 447 inisp = num_ini_spaces(line)
448 448 if inisp < indent_spaces:
449 449 indent_spaces = inisp
450 450 if indent_spaces <= 0:
451 451 #print 'Full dedent in text',self.source # dbg
452 452 full_dedent = True
453 453
454 454 if line.rstrip()[-1] == ':':
455 455 indent_spaces += 4
456 456 elif dedent_re.match(line):
457 457 indent_spaces -= 4
458 458 if indent_spaces <= 0:
459 459 full_dedent = True
460 460
461 461 # Safety
462 462 if indent_spaces < 0:
463 463 indent_spaces = 0
464 464 #print 'safety' # dbg
465 465
466 466 return indent_spaces, full_dedent
467 467
468 468 def _update_indent(self, lines):
469 469 for line in remove_comments(lines).splitlines():
470 470 if line and not line.isspace():
471 471 self.indent_spaces, self._full_dedent = self._find_indent(line)
472 472
473 473 def _store(self, lines, buffer=None, store='source'):
474 474 """Store one or more lines of input.
475 475
476 476 If input lines are not newline-terminated, a newline is automatically
477 477 appended."""
478 478
479 479 if buffer is None:
480 480 buffer = self._buffer
481 481
482 482 if lines.endswith('\n'):
483 483 buffer.append(lines)
484 484 else:
485 485 buffer.append(lines+'\n')
486 486 setattr(self, store, self._set_source(buffer))
487 487
488 488 def _set_source(self, buffer):
489 489 return u''.join(buffer)
490 490
491 491
492 492 class IPythonInputSplitter(InputSplitter):
493 493 """An input splitter that recognizes all of IPython's special syntax."""
494 494
495 495 # String with raw, untransformed input.
496 496 source_raw = ''
497 497
498 498 # Flag to track when a transformer has stored input that it hasn't given
499 499 # back yet.
500 500 transformer_accumulating = False
501 501
502 502 # Private attributes
503 503
504 504 # List with lines of raw input accumulated so far.
505 505 _buffer_raw = None
506 506
507 507 def __init__(self, input_mode=None):
508 508 super(IPythonInputSplitter, self).__init__(input_mode)
509 509 self._buffer_raw = []
510 510 self._validate = True
511 511 self.transforms = [leading_indent,
512 512 classic_prompt,
513 513 ipy_prompt,
514 514 cellmagic,
515 515 help_end,
516 516 escaped_transformer,
517 517 assign_from_magic,
518 518 assign_from_system,
519 519 ]
520 520
521 521 def reset(self):
522 522 """Reset the input buffer and associated state."""
523 523 super(IPythonInputSplitter, self).reset()
524 524 self._buffer_raw[:] = []
525 525 self.source_raw = ''
526 526 self.transformer_accumulating = False
527 527
528 528 def source_raw_reset(self):
529 529 """Return input and raw source and perform a full reset.
530 530 """
531 531 out = self.source
532 532 out_r = self.source_raw
533 533 self.reset()
534 534 return out, out_r
535 535
536 536 def push_accepts_more(self):
537 537 if self.transformer_accumulating:
538 538 return True
539 539 else:
540 540 return super(IPythonInputSplitter, self).push_accepts_more()
541 541
542 542 def transform_cell(self, cell):
543 543 """Process and translate a cell of input.
544 544 """
545 545 self.reset()
546 546 self.push(cell)
547 547 return self.source_reset()
548 548
549 549 def push(self, lines):
550 550 """Push one or more lines of IPython input.
551 551
552 552 This stores the given lines and returns a status code indicating
553 553 whether the code forms a complete Python block or not, after processing
554 554 all input lines for special IPython syntax.
555 555
556 556 Any exceptions generated in compilation are swallowed, but if an
557 557 exception was produced, the method returns True.
558 558
559 559 Parameters
560 560 ----------
561 561 lines : string
562 562 One or more lines of Python input.
563 563
564 564 Returns
565 565 -------
566 566 is_complete : boolean
567 567 True if the current input source (the result of the current input
568 568 plus prior inputs) forms a complete Python execution block. Note that
569 569 this value is also stored as a private attribute (_is_complete), so it
570 570 can be queried at any time.
571 571 """
572 if not lines:
573 return super(IPythonInputSplitter, self).push(lines)
574 572
575 573 # We must ensure all input is pure unicode
576 574 lines = cast_unicode(lines, self.encoding)
577
578 # The rest of the processing is for 'normal' content, i.e. IPython
579 # source that we process through our transformations pipeline.
575
576 # ''.splitlines() --> [], but we need to push the empty line to transformers
580 577 lines_list = lines.splitlines()
578 if not lines_list:
579 lines_list = ['']
581 580
582 581 # Transform logic
583 582 #
584 583 # We only apply the line transformers to the input if we have either no
585 584 # input yet, or complete input, or if the last line of the buffer ends
586 585 # with ':' (opening an indented block). This prevents the accidental
587 586 # transformation of escapes inside multiline expressions like
588 587 # triple-quoted strings or parenthesized expressions.
589 588 #
590 589 # The last heuristic, while ugly, ensures that the first line of an
591 590 # indented block is correctly transformed.
592 591 #
593 592 # FIXME: try to find a cleaner approach for this last bit.
594 593
595 594 # If we were in 'block' mode, since we're going to pump the parent
596 595 # class by hand line by line, we need to temporarily switch out to
597 596 # 'line' mode, do a single manual reset and then feed the lines one
598 597 # by one. Note that this only matters if the input has more than one
599 598 # line.
600 599 changed_input_mode = False
601 600
602 601 if self.input_mode == 'cell':
603 602 self.reset()
604 603 changed_input_mode = True
605 604 saved_input_mode = 'cell'
606 605 self.input_mode = 'line'
607 606
608 607 # Store raw source before applying any transformations to it. Note
609 608 # that this must be done *after* the reset() call that would otherwise
610 609 # flush the buffer.
611 610 self._store(lines, self._buffer_raw, 'source_raw')
612 611
613 612 try:
614 613 for line in lines_list:
615 614 out = self.push_line(line)
616 615 finally:
617 616 if changed_input_mode:
618 617 self.input_mode = saved_input_mode
619 618
620 619 return out
621 620
622 621 def push_line(self, line):
623 622 buf = self._buffer
624 623 not_in_string = self._is_complete or not buf or \
625 624 (buf and buf[-1].rstrip().endswith((':', ',')))
626 625 for transformer in self.transforms:
627 626 if not_in_string or transformer.look_in_string:
628 627 line = transformer.push(line)
629 628 if line is None:
630 629 self.transformer_accumulating = True
631 630 return False
632 631
633 632 self.transformer_accumulating = False
634 633 return super(IPythonInputSplitter, self).push(line)
General Comments 0
You need to be logged in to leave comments. Login now