##// END OF EJS Templates
Update inputsplitter docstring
Thomas Kluyver -
Show More
@@ -1,661 +1,627 b''
1 """Analysis of text input into executable blocks.
1 """Input handling and transformation machinery.
2 2
3 The main class in this module, :class:`InputSplitter`, is designed to break
4 input from either interactive, line-by-line environments or block-based ones,
5 into standalone blocks that can be executed by Python as 'single' statements
6 (thus triggering sys.displayhook).
3 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 input from a line-oriented frontend is complete and should be executed, and when
5 the user should be prompted for another line of code instead. The name 'input
6 splitter' is largely for historical reasons.
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 and stores the results.
10 13
11 For more details, see the class docstring below.
12
13 Syntax Transformations
14 ----------------------
15
16 One of the main jobs of the code in this file is to apply all syntax
17 transformations that make up 'the IPython language', i.e. magics, shell
18 escapes, etc. All transformations should be implemented as *fully stateless*
19 entities, that simply take one line as their input and return a line.
20 Internally for implementation purposes they may be a normal function or a
21 callable object, but the only input they receive will be a single line and they
22 should only return a line, without holding any data-dependent state between
23 calls.
24
25 As an example, the EscapedTransformer is a class so we can more clearly group
26 together the functionality of dispatching to individual functions based on the
27 starting escape character, but the only method for public use is its call
28 method.
29
30
31 ToDo
32 ----
33
34 - Should we make push() actually raise an exception once push_accepts_more()
35 returns False?
36
37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 at least just attributes of a class so not really very exposed.
39
40 - Think about the best way to support dynamic things: automagic, autocall,
41 macros, etc.
42
43 - Think of a better heuristic for the application of the transforms in
44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 track indentation change events (indent, dedent, nothing) and apply them only
46 if the indentation went up, but not otherwise.
47
48 - Think of the cleanest way for supporting user-specified transformations (the
49 user prefilters we had before).
14 For more details, see the class docstrings below.
50 15
51 16 Authors
52 17 -------
53 18
54 19 * Fernando Perez
55 20 * Brian Granger
21 * Thomas Kluyver
56 22 """
57 23 #-----------------------------------------------------------------------------
58 24 # Copyright (C) 2010 The IPython Development Team
59 25 #
60 26 # Distributed under the terms of the BSD License. The full license is in
61 27 # the file COPYING, distributed as part of this software.
62 28 #-----------------------------------------------------------------------------
63 29
64 30 #-----------------------------------------------------------------------------
65 31 # Imports
66 32 #-----------------------------------------------------------------------------
67 33 # stdlib
68 34 import ast
69 35 import codeop
70 36 import re
71 37 import sys
72 38
73 39 # IPython modules
74 40 from IPython.utils.py3compat import cast_unicode
75 41 from IPython.core.inputtransformer import (leading_indent,
76 42 classic_prompt,
77 43 ipy_prompt,
78 44 strip_encoding_cookie,
79 45 cellmagic,
80 46 assemble_logical_lines,
81 47 help_end,
82 48 escaped_commands,
83 49 assign_from_magic,
84 50 assign_from_system,
85 51 assemble_python_lines,
86 52 )
87 53
88 54 # These are available in this module for backwards compatibility.
89 55 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
90 56 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
91 57 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
92 58
93 59 #-----------------------------------------------------------------------------
94 60 # Utilities
95 61 #-----------------------------------------------------------------------------
96 62
97 63 # FIXME: These are general-purpose utilities that later can be moved to the
98 64 # general ward. Kept here for now because we're being very strict about test
99 65 # coverage with this code, and this lets us ensure that we keep 100% coverage
100 66 # while developing.
101 67
102 68 # compiled regexps for autoindent management
103 69 dedent_re = re.compile('|'.join([
104 70 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
105 71 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
106 72 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
107 73 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
108 74 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
109 75 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
110 76 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
111 77 ]))
112 78 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
113 79
114 80 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
115 81 # before pure comments
116 82 comment_line_re = re.compile('^\s*\#')
117 83
118 84
119 85 def num_ini_spaces(s):
120 86 """Return the number of initial spaces in a string.
121 87
122 88 Note that tabs are counted as a single space. For now, we do *not* support
123 89 mixing of tabs and spaces in the user's input.
124 90
125 91 Parameters
126 92 ----------
127 93 s : string
128 94
129 95 Returns
130 96 -------
131 97 n : int
132 98 """
133 99
134 100 ini_spaces = ini_spaces_re.match(s)
135 101 if ini_spaces:
136 102 return ini_spaces.end()
137 103 else:
138 104 return 0
139 105
140 106 def last_blank(src):
141 107 """Determine if the input source ends in a blank.
142 108
143 109 A blank is either a newline or a line consisting of whitespace.
144 110
145 111 Parameters
146 112 ----------
147 113 src : string
148 114 A single or multiline string.
149 115 """
150 116 if not src: return False
151 117 ll = src.splitlines()[-1]
152 118 return (ll == '') or ll.isspace()
153 119
154 120
155 121 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
156 122 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
157 123
158 124 def last_two_blanks(src):
159 125 """Determine if the input source ends in two blanks.
160 126
161 127 A blank is either a newline or a line consisting of whitespace.
162 128
163 129 Parameters
164 130 ----------
165 131 src : string
166 132 A single or multiline string.
167 133 """
168 134 if not src: return False
169 135 # The logic here is tricky: I couldn't get a regexp to work and pass all
170 136 # the tests, so I took a different approach: split the source by lines,
171 137 # grab the last two and prepend '###\n' as a stand-in for whatever was in
172 138 # the body before the last two lines. Then, with that structure, it's
173 139 # possible to analyze with two regexps. Not the most elegant solution, but
174 140 # it works. If anyone tries to change this logic, make sure to validate
175 141 # the whole test suite first!
176 142 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
177 143 return (bool(last_two_blanks_re.match(new_src)) or
178 144 bool(last_two_blanks_re2.match(new_src)) )
179 145
180 146
181 147 def remove_comments(src):
182 148 """Remove all comments from input source.
183 149
184 150 Note: comments are NOT recognized inside of strings!
185 151
186 152 Parameters
187 153 ----------
188 154 src : string
189 155 A single or multiline input string.
190 156
191 157 Returns
192 158 -------
193 159 String with all Python comments removed.
194 160 """
195 161
196 162 return re.sub('#.*', '', src)
197 163
198 164
199 165 def get_input_encoding():
200 166 """Return the default standard input encoding.
201 167
202 168 If sys.stdin has no encoding, 'ascii' is returned."""
203 169 # There are strange environments for which sys.stdin.encoding is None. We
204 170 # ensure that a valid encoding is returned.
205 171 encoding = getattr(sys.stdin, 'encoding', None)
206 172 if encoding is None:
207 173 encoding = 'ascii'
208 174 return encoding
209 175
210 176 #-----------------------------------------------------------------------------
211 177 # Classes and functions for normal Python syntax handling
212 178 #-----------------------------------------------------------------------------
213 179
214 180 class InputSplitter(object):
215 181 r"""An object that can accumulate lines of Python source before execution.
216 182
217 183 This object is designed to be fed python source line-by-line, using
218 184 :meth:`push`. It will return on each push whether the currently pushed
219 185 code could be executed already. In addition, it provides a method called
220 186 :meth:`push_accepts_more` that can be used to query whether more input
221 187 can be pushed into a single interactive block.
222 188
223 189 This is a simple example of how an interactive terminal-based client can use
224 190 this tool::
225 191
226 192 isp = InputSplitter()
227 193 while isp.push_accepts_more():
228 194 indent = ' '*isp.indent_spaces
229 195 prompt = '>>> ' + indent
230 196 line = indent + raw_input(prompt)
231 197 isp.push(line)
232 198 print 'Input source was:\n', isp.source_reset(),
233 199 """
234 200 # Number of spaces of indentation computed from input that has been pushed
235 201 # so far. This is the attributes callers should query to get the current
236 202 # indentation level, in order to provide auto-indent facilities.
237 203 indent_spaces = 0
238 204 # String, indicating the default input encoding. It is computed by default
239 205 # at initialization time via get_input_encoding(), but it can be reset by a
240 206 # client with specific knowledge of the encoding.
241 207 encoding = ''
242 208 # String where the current full source input is stored, properly encoded.
243 209 # Reading this attribute is the normal way of querying the currently pushed
244 210 # source code, that has been properly encoded.
245 211 source = ''
246 212 # Code object corresponding to the current source. It is automatically
247 213 # synced to the source, so it can be queried at any time to obtain the code
248 214 # object; it will be None if the source doesn't compile to valid Python.
249 215 code = None
250 216
251 217 # Private attributes
252 218
253 219 # List with lines of input accumulated so far
254 220 _buffer = None
255 221 # Command compiler
256 222 _compile = None
257 223 # Mark when input has changed indentation all the way back to flush-left
258 224 _full_dedent = False
259 225 # Boolean indicating whether the current block is complete
260 226 _is_complete = None
261 227
262 228 def __init__(self):
263 229 """Create a new InputSplitter instance.
264 230 """
265 231 self._buffer = []
266 232 self._compile = codeop.CommandCompiler()
267 233 self.encoding = get_input_encoding()
268 234
269 235 def reset(self):
270 236 """Reset the input buffer and associated state."""
271 237 self.indent_spaces = 0
272 238 self._buffer[:] = []
273 239 self.source = ''
274 240 self.code = None
275 241 self._is_complete = False
276 242 self._full_dedent = False
277 243
278 244 def source_reset(self):
279 245 """Return the input source and perform a full reset.
280 246 """
281 247 out = self.source
282 248 self.reset()
283 249 return out
284 250
285 251 def push(self, lines):
286 252 """Push one or more lines of input.
287 253
288 254 This stores the given lines and returns a status code indicating
289 255 whether the code forms a complete Python block or not.
290 256
291 257 Any exceptions generated in compilation are swallowed, but if an
292 258 exception was produced, the method returns True.
293 259
294 260 Parameters
295 261 ----------
296 262 lines : string
297 263 One or more lines of Python input.
298 264
299 265 Returns
300 266 -------
301 267 is_complete : boolean
302 268 True if the current input source (the result of the current input
303 269 plus prior inputs) forms a complete Python execution block. Note that
304 270 this value is also stored as a private attribute (``_is_complete``), so it
305 271 can be queried at any time.
306 272 """
307 273 self._store(lines)
308 274 source = self.source
309 275
310 276 # Before calling _compile(), reset the code object to None so that if an
311 277 # exception is raised in compilation, we don't mislead by having
312 278 # inconsistent code/source attributes.
313 279 self.code, self._is_complete = None, None
314 280
315 281 # Honor termination lines properly
316 282 if source.endswith('\\\n'):
317 283 return False
318 284
319 285 self._update_indent(lines)
320 286 try:
321 287 self.code = self._compile(source, symbol="exec")
322 288 # Invalid syntax can produce any of a number of different errors from
323 289 # inside the compiler, so we have to catch them all. Syntax errors
324 290 # immediately produce a 'ready' block, so the invalid Python can be
325 291 # sent to the kernel for evaluation with possible ipython
326 292 # special-syntax conversion.
327 293 except (SyntaxError, OverflowError, ValueError, TypeError,
328 294 MemoryError):
329 295 self._is_complete = True
330 296 else:
331 297 # Compilation didn't produce any exceptions (though it may not have
332 298 # given a complete code object)
333 299 self._is_complete = self.code is not None
334 300
335 301 return self._is_complete
336 302
337 303 def push_accepts_more(self):
338 304 """Return whether a block of interactive input can accept more input.
339 305
340 306 This method is meant to be used by line-oriented frontends, who need to
341 307 guess whether a block is complete or not based solely on prior and
342 308 current input lines. The InputSplitter considers it has a complete
343 309 interactive block and will not accept more input when either:
344 310
345 311 * A SyntaxError is raised
346 312
347 313 * The code is complete and consists of a single line or a single
348 314 non-compound statement
349 315
350 316 * The code is complete and has a blank line at the end
351 317
352 318 If the current input produces a syntax error, this method immediately
353 319 returns False but does *not* raise the syntax error exception, as
354 320 typically clients will want to send invalid syntax to an execution
355 321 backend which might convert the invalid syntax into valid Python via
356 322 one of the dynamic IPython mechanisms.
357 323 """
358 324
359 325 # With incomplete input, unconditionally accept more
360 326 # A syntax error also sets _is_complete to True - see push()
361 327 if not self._is_complete:
362 328 #print("Not complete") # debug
363 329 return True
364 330
365 331 # The user can make any (complete) input execute by leaving a blank line
366 332 last_line = self.source.splitlines()[-1]
367 333 if (not last_line) or last_line.isspace():
368 334 #print("Blank line") # debug
369 335 return False
370 336
371 337 # If there's just a single line or AST node, and we're flush left, as is
372 338 # the case after a simple statement such as 'a=1', we want to execute it
373 339 # straight away.
374 340 if self.indent_spaces==0:
375 341 if len(self.source.splitlines()) <= 1:
376 342 return False
377 343
378 344 try:
379 345 code_ast = ast.parse(u''.join(self._buffer))
380 346 except Exception:
381 347 #print("Can't parse AST") # debug
382 348 return False
383 349 else:
384 350 if len(code_ast.body) == 1 and \
385 351 not hasattr(code_ast.body[0], 'body'):
386 352 #print("Simple statement") # debug
387 353 return False
388 354
389 355 # General fallback - accept more code
390 356 return True
391 357
392 358 #------------------------------------------------------------------------
393 359 # Private interface
394 360 #------------------------------------------------------------------------
395 361
396 362 def _find_indent(self, line):
397 363 """Compute the new indentation level for a single line.
398 364
399 365 Parameters
400 366 ----------
401 367 line : str
402 368 A single new line of non-whitespace, non-comment Python input.
403 369
404 370 Returns
405 371 -------
406 372 indent_spaces : int
407 373 New value for the indent level (it may be equal to self.indent_spaces
408 374 if indentation doesn't change.
409 375
410 376 full_dedent : boolean
411 377 Whether the new line causes a full flush-left dedent.
412 378 """
413 379 indent_spaces = self.indent_spaces
414 380 full_dedent = self._full_dedent
415 381
416 382 inisp = num_ini_spaces(line)
417 383 if inisp < indent_spaces:
418 384 indent_spaces = inisp
419 385 if indent_spaces <= 0:
420 386 #print 'Full dedent in text',self.source # dbg
421 387 full_dedent = True
422 388
423 389 if line.rstrip()[-1] == ':':
424 390 indent_spaces += 4
425 391 elif dedent_re.match(line):
426 392 indent_spaces -= 4
427 393 if indent_spaces <= 0:
428 394 full_dedent = True
429 395
430 396 # Safety
431 397 if indent_spaces < 0:
432 398 indent_spaces = 0
433 399 #print 'safety' # dbg
434 400
435 401 return indent_spaces, full_dedent
436 402
437 403 def _update_indent(self, lines):
438 404 for line in remove_comments(lines).splitlines():
439 405 if line and not line.isspace():
440 406 self.indent_spaces, self._full_dedent = self._find_indent(line)
441 407
442 408 def _store(self, lines, buffer=None, store='source'):
443 409 """Store one or more lines of input.
444 410
445 411 If input lines are not newline-terminated, a newline is automatically
446 412 appended."""
447 413
448 414 if buffer is None:
449 415 buffer = self._buffer
450 416
451 417 if lines.endswith('\n'):
452 418 buffer.append(lines)
453 419 else:
454 420 buffer.append(lines+'\n')
455 421 setattr(self, store, self._set_source(buffer))
456 422
457 423 def _set_source(self, buffer):
458 424 return u''.join(buffer)
459 425
460 426
461 427 class IPythonInputSplitter(InputSplitter):
462 428 """An input splitter that recognizes all of IPython's special syntax."""
463 429
464 430 # String with raw, untransformed input.
465 431 source_raw = ''
466 432
467 433 # Flag to track when a transformer has stored input that it hasn't given
468 434 # back yet.
469 435 transformer_accumulating = False
470 436
471 437 # Flag to track when assemble_python_lines has stored input that it hasn't
472 438 # given back yet.
473 439 within_python_line = False
474 440
475 441 # Private attributes
476 442
477 443 # List with lines of raw input accumulated so far.
478 444 _buffer_raw = None
479 445
480 446 def __init__(self, line_input_checker=True, physical_line_transforms=None,
481 447 logical_line_transforms=None, python_line_transforms=None):
482 448 super(IPythonInputSplitter, self).__init__()
483 449 self._buffer_raw = []
484 450 self._validate = True
485 451
486 452 if physical_line_transforms is not None:
487 453 self.physical_line_transforms = physical_line_transforms
488 454 else:
489 455 self.physical_line_transforms = [
490 456 leading_indent(),
491 457 classic_prompt(),
492 458 ipy_prompt(),
493 459 strip_encoding_cookie(),
494 460 cellmagic(end_on_blank_line=line_input_checker),
495 461 ]
496 462
497 463 self.assemble_logical_lines = assemble_logical_lines()
498 464 if logical_line_transforms is not None:
499 465 self.logical_line_transforms = logical_line_transforms
500 466 else:
501 467 self.logical_line_transforms = [
502 468 help_end(),
503 469 escaped_commands(),
504 470 assign_from_magic(),
505 471 assign_from_system(),
506 472 ]
507 473
508 474 self.assemble_python_lines = assemble_python_lines()
509 475 if python_line_transforms is not None:
510 476 self.python_line_transforms = python_line_transforms
511 477 else:
512 478 # We don't use any of these at present
513 479 self.python_line_transforms = []
514 480
515 481 @property
516 482 def transforms(self):
517 483 "Quick access to all transformers."
518 484 return self.physical_line_transforms + \
519 485 [self.assemble_logical_lines] + self.logical_line_transforms + \
520 486 [self.assemble_python_lines] + self.python_line_transforms
521 487
522 488 @property
523 489 def transforms_in_use(self):
524 490 """Transformers, excluding logical line transformers if we're in a
525 491 Python line."""
526 492 t = self.physical_line_transforms[:]
527 493 if not self.within_python_line:
528 494 t += [self.assemble_logical_lines] + self.logical_line_transforms
529 495 return t + [self.assemble_python_lines] + self.python_line_transforms
530 496
531 497 def reset(self):
532 498 """Reset the input buffer and associated state."""
533 499 super(IPythonInputSplitter, self).reset()
534 500 self._buffer_raw[:] = []
535 501 self.source_raw = ''
536 502 self.transformer_accumulating = False
537 503 self.within_python_line = False
538 504 for t in self.transforms:
539 505 t.reset()
540 506
541 507 def flush_transformers(self):
542 508 def _flush(transform, out):
543 509 if out is not None:
544 510 tmp = transform.push(out)
545 511 return tmp or transform.reset() or None
546 512 else:
547 513 return transform.reset() or None
548 514
549 515 out = None
550 516 for t in self.transforms_in_use:
551 517 out = _flush(t, out)
552 518
553 519 if out is not None:
554 520 self._store(out)
555 521
556 522 def source_raw_reset(self):
557 523 """Return input and raw source and perform a full reset.
558 524 """
559 525 self.flush_transformers()
560 526 out = self.source
561 527 out_r = self.source_raw
562 528 self.reset()
563 529 return out, out_r
564 530
565 531 def source_reset(self):
566 532 self.flush_transformers()
567 533 return super(IPythonInputSplitter, self).source_reset()
568 534
569 535 def push_accepts_more(self):
570 536 if self.transformer_accumulating:
571 537 return True
572 538 else:
573 539 return super(IPythonInputSplitter, self).push_accepts_more()
574 540
575 541 def transform_cell(self, cell):
576 542 """Process and translate a cell of input.
577 543 """
578 544 self.reset()
579 545 self.push(cell)
580 546 return self.source_reset()
581 547
582 548 def push(self, lines):
583 549 """Push one or more lines of IPython input.
584 550
585 551 This stores the given lines and returns a status code indicating
586 552 whether the code forms a complete Python block or not, after processing
587 553 all input lines for special IPython syntax.
588 554
589 555 Any exceptions generated in compilation are swallowed, but if an
590 556 exception was produced, the method returns True.
591 557
592 558 Parameters
593 559 ----------
594 560 lines : string
595 561 One or more lines of Python input.
596 562
597 563 Returns
598 564 -------
599 565 is_complete : boolean
600 566 True if the current input source (the result of the current input
601 plus prior inputs) forms a complete Python execution block. Note that
602 this value is also stored as a private attribute (_is_complete), so it
603 can be queried at any time.
567 plus prior inputs) forms a complete Python execution block. Note that
568 this value is also stored as a private attribute (_is_complete), so it
569 can be queried at any time.
604 570 """
605 571
606 572 # We must ensure all input is pure unicode
607 573 lines = cast_unicode(lines, self.encoding)
608 574
609 575 # ''.splitlines() --> [], but we need to push the empty line to transformers
610 576 lines_list = lines.splitlines()
611 577 if not lines_list:
612 578 lines_list = ['']
613 579
614 580 # Store raw source before applying any transformations to it. Note
615 581 # that this must be done *after* the reset() call that would otherwise
616 582 # flush the buffer.
617 583 self._store(lines, self._buffer_raw, 'source_raw')
618 584
619 585 for line in lines_list:
620 586 out = self.push_line(line)
621 587
622 588 return out
623 589
624 590 def push_line(self, line):
625 591 buf = self._buffer
626 592
627 593 def _accumulating(dbg):
628 594 #print(dbg)
629 595 self.transformer_accumulating = True
630 596 return False
631 597
632 598 for transformer in self.physical_line_transforms:
633 599 line = transformer.push(line)
634 600 if line is None:
635 601 return _accumulating(transformer)
636 602
637 603 if not self.within_python_line:
638 604 line = self.assemble_logical_lines.push(line)
639 605 if line is None:
640 606 return _accumulating('acc logical line')
641 607
642 608 for transformer in self.logical_line_transforms:
643 609 line = transformer.push(line)
644 610 if line is None:
645 611 return _accumulating(transformer)
646 612
647 613 line = self.assemble_python_lines.push(line)
648 614 if line is None:
649 615 self.within_python_line = True
650 616 return _accumulating('acc python line')
651 617 else:
652 618 self.within_python_line = False
653 619
654 620 for transformer in self.python_line_transforms:
655 621 line = transformer.push(line)
656 622 if line is None:
657 623 return _accumulating(transformer)
658 624
659 625 #print("transformers clear") #debug
660 626 self.transformer_accumulating = False
661 627 return super(IPythonInputSplitter, self).push(line)
General Comments 0
You need to be logged in to leave comments. Login now