##// END OF EJS Templates
only reset the transform once...
MinRK -
Show More
@@ -1,646 +1,638 b''
1 1 """Input handling and transformation machinery.
2 2
3 3 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 4 input from a line-oriented frontend is complete and should be executed, and when
5 5 the user should be prompted for another line of code instead. The name 'input
6 6 splitter' is largely for historical reasons.
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 12 and stores the results.
13 13
14 14 For more details, see the class docstrings below.
15 15 """
16 16
17 17 # Copyright (c) IPython Development Team.
18 18 # Distributed under the terms of the Modified BSD License.
19 19 import ast
20 20 import codeop
21 21 import re
22 22 import sys
23 23
24 24 from IPython.utils.py3compat import cast_unicode
25 25 from IPython.core.inputtransformer import (leading_indent,
26 26 classic_prompt,
27 27 ipy_prompt,
28 28 strip_encoding_cookie,
29 29 cellmagic,
30 30 assemble_logical_lines,
31 31 help_end,
32 32 escaped_commands,
33 33 assign_from_magic,
34 34 assign_from_system,
35 35 assemble_python_lines,
36 36 )
37 37
38 38 # These are available in this module for backwards compatibility.
39 39 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
40 40 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
41 41 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Utilities
45 45 #-----------------------------------------------------------------------------
46 46
47 47 # FIXME: These are general-purpose utilities that later can be moved to the
48 48 # general ward. Kept here for now because we're being very strict about test
49 49 # coverage with this code, and this lets us ensure that we keep 100% coverage
50 50 # while developing.
51 51
52 52 # compiled regexps for autoindent management
53 53 dedent_re = re.compile('|'.join([
54 54 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
55 55 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
56 56 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
57 57 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
58 58 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
59 59 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
60 60 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
61 61 ]))
62 62 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
63 63
64 64 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
65 65 # before pure comments
66 66 comment_line_re = re.compile('^\s*\#')
67 67
68 68
69 69 def num_ini_spaces(s):
70 70 """Return the number of initial spaces in a string.
71 71
72 72 Note that tabs are counted as a single space. For now, we do *not* support
73 73 mixing of tabs and spaces in the user's input.
74 74
75 75 Parameters
76 76 ----------
77 77 s : string
78 78
79 79 Returns
80 80 -------
81 81 n : int
82 82 """
83 83
84 84 ini_spaces = ini_spaces_re.match(s)
85 85 if ini_spaces:
86 86 return ini_spaces.end()
87 87 else:
88 88 return 0
89 89
90 90 def last_blank(src):
91 91 """Determine if the input source ends in a blank.
92 92
93 93 A blank is either a newline or a line consisting of whitespace.
94 94
95 95 Parameters
96 96 ----------
97 97 src : string
98 98 A single or multiline string.
99 99 """
100 100 if not src: return False
101 101 ll = src.splitlines()[-1]
102 102 return (ll == '') or ll.isspace()
103 103
104 104
105 105 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
106 106 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
107 107
108 108 def last_two_blanks(src):
109 109 """Determine if the input source ends in two blanks.
110 110
111 111 A blank is either a newline or a line consisting of whitespace.
112 112
113 113 Parameters
114 114 ----------
115 115 src : string
116 116 A single or multiline string.
117 117 """
118 118 if not src: return False
119 119 # The logic here is tricky: I couldn't get a regexp to work and pass all
120 120 # the tests, so I took a different approach: split the source by lines,
121 121 # grab the last two and prepend '###\n' as a stand-in for whatever was in
122 122 # the body before the last two lines. Then, with that structure, it's
123 123 # possible to analyze with two regexps. Not the most elegant solution, but
124 124 # it works. If anyone tries to change this logic, make sure to validate
125 125 # the whole test suite first!
126 126 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
127 127 return (bool(last_two_blanks_re.match(new_src)) or
128 128 bool(last_two_blanks_re2.match(new_src)) )
129 129
130 130
131 131 def remove_comments(src):
132 132 """Remove all comments from input source.
133 133
134 134 Note: comments are NOT recognized inside of strings!
135 135
136 136 Parameters
137 137 ----------
138 138 src : string
139 139 A single or multiline input string.
140 140
141 141 Returns
142 142 -------
143 143 String with all Python comments removed.
144 144 """
145 145
146 146 return re.sub('#.*', '', src)
147 147
148 148
149 149 def get_input_encoding():
150 150 """Return the default standard input encoding.
151 151
152 152 If sys.stdin has no encoding, 'ascii' is returned."""
153 153 # There are strange environments for which sys.stdin.encoding is None. We
154 154 # ensure that a valid encoding is returned.
155 155 encoding = getattr(sys.stdin, 'encoding', None)
156 156 if encoding is None:
157 157 encoding = 'ascii'
158 158 return encoding
159 159
160 160 #-----------------------------------------------------------------------------
161 161 # Classes and functions for normal Python syntax handling
162 162 #-----------------------------------------------------------------------------
163 163
164 164 class InputSplitter(object):
165 165 r"""An object that can accumulate lines of Python source before execution.
166 166
167 167 This object is designed to be fed python source line-by-line, using
168 168 :meth:`push`. It will return on each push whether the currently pushed
169 169 code could be executed already. In addition, it provides a method called
170 170 :meth:`push_accepts_more` that can be used to query whether more input
171 171 can be pushed into a single interactive block.
172 172
173 173 This is a simple example of how an interactive terminal-based client can use
174 174 this tool::
175 175
176 176 isp = InputSplitter()
177 177 while isp.push_accepts_more():
178 178 indent = ' '*isp.indent_spaces
179 179 prompt = '>>> ' + indent
180 180 line = indent + raw_input(prompt)
181 181 isp.push(line)
182 182 print 'Input source was:\n', isp.source_reset(),
183 183 """
184 184 # Number of spaces of indentation computed from input that has been pushed
185 185 # so far. This is the attributes callers should query to get the current
186 186 # indentation level, in order to provide auto-indent facilities.
187 187 indent_spaces = 0
188 188 # String, indicating the default input encoding. It is computed by default
189 189 # at initialization time via get_input_encoding(), but it can be reset by a
190 190 # client with specific knowledge of the encoding.
191 191 encoding = ''
192 192 # String where the current full source input is stored, properly encoded.
193 193 # Reading this attribute is the normal way of querying the currently pushed
194 194 # source code, that has been properly encoded.
195 195 source = ''
196 196 # Code object corresponding to the current source. It is automatically
197 197 # synced to the source, so it can be queried at any time to obtain the code
198 198 # object; it will be None if the source doesn't compile to valid Python.
199 199 code = None
200 200
201 201 # Private attributes
202 202
203 203 # List with lines of input accumulated so far
204 204 _buffer = None
205 205 # Command compiler
206 206 _compile = None
207 207 # Mark when input has changed indentation all the way back to flush-left
208 208 _full_dedent = False
209 209 # Boolean indicating whether the current block is complete
210 210 _is_complete = None
211 211
212 212 def __init__(self):
213 213 """Create a new InputSplitter instance.
214 214 """
215 215 self._buffer = []
216 216 self._compile = codeop.CommandCompiler()
217 217 self.encoding = get_input_encoding()
218 218
219 219 def reset(self):
220 220 """Reset the input buffer and associated state."""
221 221 self.indent_spaces = 0
222 222 self._buffer[:] = []
223 223 self.source = ''
224 224 self.code = None
225 225 self._is_complete = False
226 226 self._full_dedent = False
227 227
228 228 def source_reset(self):
229 229 """Return the input source and perform a full reset.
230 230 """
231 231 out = self.source
232 232 self.reset()
233 233 return out
234 234
235 235 def push(self, lines):
236 236 """Push one or more lines of input.
237 237
238 238 This stores the given lines and returns a status code indicating
239 239 whether the code forms a complete Python block or not.
240 240
241 241 Any exceptions generated in compilation are swallowed, but if an
242 242 exception was produced, the method returns True.
243 243
244 244 Parameters
245 245 ----------
246 246 lines : string
247 247 One or more lines of Python input.
248 248
249 249 Returns
250 250 -------
251 251 is_complete : boolean
252 252 True if the current input source (the result of the current input
253 253 plus prior inputs) forms a complete Python execution block. Note that
254 254 this value is also stored as a private attribute (``_is_complete``), so it
255 255 can be queried at any time.
256 256 """
257 257 self._store(lines)
258 258 source = self.source
259 259
260 260 # Before calling _compile(), reset the code object to None so that if an
261 261 # exception is raised in compilation, we don't mislead by having
262 262 # inconsistent code/source attributes.
263 263 self.code, self._is_complete = None, None
264 264
265 265 # Honor termination lines properly
266 266 if source.endswith('\\\n'):
267 267 return False
268 268
269 269 self._update_indent(lines)
270 270 try:
271 271 self.code = self._compile(source, symbol="exec")
272 272 # Invalid syntax can produce any of a number of different errors from
273 273 # inside the compiler, so we have to catch them all. Syntax errors
274 274 # immediately produce a 'ready' block, so the invalid Python can be
275 275 # sent to the kernel for evaluation with possible ipython
276 276 # special-syntax conversion.
277 277 except (SyntaxError, OverflowError, ValueError, TypeError,
278 278 MemoryError):
279 279 self._is_complete = True
280 280 else:
281 281 # Compilation didn't produce any exceptions (though it may not have
282 282 # given a complete code object)
283 283 self._is_complete = self.code is not None
284 284
285 285 return self._is_complete
286 286
287 287 def push_accepts_more(self):
288 288 """Return whether a block of interactive input can accept more input.
289 289
290 290 This method is meant to be used by line-oriented frontends, who need to
291 291 guess whether a block is complete or not based solely on prior and
292 292 current input lines. The InputSplitter considers it has a complete
293 293 interactive block and will not accept more input when either:
294 294
295 295 * A SyntaxError is raised
296 296
297 297 * The code is complete and consists of a single line or a single
298 298 non-compound statement
299 299
300 300 * The code is complete and has a blank line at the end
301 301
302 302 If the current input produces a syntax error, this method immediately
303 303 returns False but does *not* raise the syntax error exception, as
304 304 typically clients will want to send invalid syntax to an execution
305 305 backend which might convert the invalid syntax into valid Python via
306 306 one of the dynamic IPython mechanisms.
307 307 """
308 308
309 309 # With incomplete input, unconditionally accept more
310 310 # A syntax error also sets _is_complete to True - see push()
311 311 if not self._is_complete:
312 312 #print("Not complete") # debug
313 313 return True
314 314
315 315 # The user can make any (complete) input execute by leaving a blank line
316 316 last_line = self.source.splitlines()[-1]
317 317 if (not last_line) or last_line.isspace():
318 318 #print("Blank line") # debug
319 319 return False
320 320
321 321 # If there's just a single line or AST node, and we're flush left, as is
322 322 # the case after a simple statement such as 'a=1', we want to execute it
323 323 # straight away.
324 324 if self.indent_spaces==0:
325 325 if len(self.source.splitlines()) <= 1:
326 326 return False
327 327
328 328 try:
329 329 code_ast = ast.parse(u''.join(self._buffer))
330 330 except Exception:
331 331 #print("Can't parse AST") # debug
332 332 return False
333 333 else:
334 334 if len(code_ast.body) == 1 and \
335 335 not hasattr(code_ast.body[0], 'body'):
336 336 #print("Simple statement") # debug
337 337 return False
338 338
339 339 # General fallback - accept more code
340 340 return True
341 341
342 342 #------------------------------------------------------------------------
343 343 # Private interface
344 344 #------------------------------------------------------------------------
345 345
346 346 def _find_indent(self, line):
347 347 """Compute the new indentation level for a single line.
348 348
349 349 Parameters
350 350 ----------
351 351 line : str
352 352 A single new line of non-whitespace, non-comment Python input.
353 353
354 354 Returns
355 355 -------
356 356 indent_spaces : int
357 357 New value for the indent level (it may be equal to self.indent_spaces
358 358 if indentation doesn't change.
359 359
360 360 full_dedent : boolean
361 361 Whether the new line causes a full flush-left dedent.
362 362 """
363 363 indent_spaces = self.indent_spaces
364 364 full_dedent = self._full_dedent
365 365
366 366 inisp = num_ini_spaces(line)
367 367 if inisp < indent_spaces:
368 368 indent_spaces = inisp
369 369 if indent_spaces <= 0:
370 370 #print 'Full dedent in text',self.source # dbg
371 371 full_dedent = True
372 372
373 373 if line.rstrip()[-1] == ':':
374 374 indent_spaces += 4
375 375 elif dedent_re.match(line):
376 376 indent_spaces -= 4
377 377 if indent_spaces <= 0:
378 378 full_dedent = True
379 379
380 380 # Safety
381 381 if indent_spaces < 0:
382 382 indent_spaces = 0
383 383 #print 'safety' # dbg
384 384
385 385 return indent_spaces, full_dedent
386 386
387 387 def _update_indent(self, lines):
388 388 for line in remove_comments(lines).splitlines():
389 389 if line and not line.isspace():
390 390 self.indent_spaces, self._full_dedent = self._find_indent(line)
391 391
392 392 def _store(self, lines, buffer=None, store='source'):
393 393 """Store one or more lines of input.
394 394
395 395 If input lines are not newline-terminated, a newline is automatically
396 396 appended."""
397 397
398 398 if buffer is None:
399 399 buffer = self._buffer
400 400
401 401 if lines.endswith('\n'):
402 402 buffer.append(lines)
403 403 else:
404 404 buffer.append(lines+'\n')
405 405 setattr(self, store, self._set_source(buffer))
406 406
407 407 def _set_source(self, buffer):
408 408 return u''.join(buffer)
409 409
410 410
411 411 class IPythonInputSplitter(InputSplitter):
412 412 """An input splitter that recognizes all of IPython's special syntax."""
413 413
414 414 # String with raw, untransformed input.
415 415 source_raw = ''
416 416
417 417 # Flag to track when a transformer has stored input that it hasn't given
418 418 # back yet.
419 419 transformer_accumulating = False
420 420
421 421 # Flag to track when assemble_python_lines has stored input that it hasn't
422 422 # given back yet.
423 423 within_python_line = False
424 424
425 425 # Private attributes
426 426
427 427 # List with lines of raw input accumulated so far.
428 428 _buffer_raw = None
429 429
430 430 def __init__(self, line_input_checker=True, physical_line_transforms=None,
431 431 logical_line_transforms=None, python_line_transforms=None):
432 432 super(IPythonInputSplitter, self).__init__()
433 433 self._buffer_raw = []
434 434 self._validate = True
435 435
436 436 if physical_line_transforms is not None:
437 437 self.physical_line_transforms = physical_line_transforms
438 438 else:
439 439 self.physical_line_transforms = [
440 440 leading_indent(),
441 441 classic_prompt(),
442 442 ipy_prompt(),
443 443 strip_encoding_cookie(),
444 444 cellmagic(end_on_blank_line=line_input_checker),
445 445 ]
446 446
447 447 self.assemble_logical_lines = assemble_logical_lines()
448 448 if logical_line_transforms is not None:
449 449 self.logical_line_transforms = logical_line_transforms
450 450 else:
451 451 self.logical_line_transforms = [
452 452 help_end(),
453 453 escaped_commands(),
454 454 assign_from_magic(),
455 455 assign_from_system(),
456 456 ]
457 457
458 458 self.assemble_python_lines = assemble_python_lines()
459 459 if python_line_transforms is not None:
460 460 self.python_line_transforms = python_line_transforms
461 461 else:
462 462 # We don't use any of these at present
463 463 self.python_line_transforms = []
464 464
465 465 @property
466 466 def transforms(self):
467 467 "Quick access to all transformers."
468 468 return self.physical_line_transforms + \
469 469 [self.assemble_logical_lines] + self.logical_line_transforms + \
470 470 [self.assemble_python_lines] + self.python_line_transforms
471 471
472 472 @property
473 473 def transforms_in_use(self):
474 474 """Transformers, excluding logical line transformers if we're in a
475 475 Python line."""
476 476 t = self.physical_line_transforms[:]
477 477 if not self.within_python_line:
478 478 t += [self.assemble_logical_lines] + self.logical_line_transforms
479 479 return t + [self.assemble_python_lines] + self.python_line_transforms
480 480
481 481 def reset(self):
482 482 """Reset the input buffer and associated state."""
483 483 super(IPythonInputSplitter, self).reset()
484 484 self._buffer_raw[:] = []
485 485 self.source_raw = ''
486 486 self.transformer_accumulating = False
487 487 self.within_python_line = False
488 488
489 489 for t in self.transforms:
490 490 try:
491 491 t.reset()
492 492 except SyntaxError:
493 493 # Nothing that calls reset() expects to handle transformer
494 494 # errors
495 495 pass
496 496
497 497 def flush_transformers(self):
498 def _flush(transform, out):
498 def _flush(transform, outs):
499 499 """yield transformed lines
500 500
501 501 always strings, never None
502 502
503 503 transform: the current transform
504 out: an iterable of previously transformed inputs.
504 outs: an iterable of previously transformed inputs.
505 505 Each may be multiline, which will be passed
506 506 one line at a time to transform.
507 507 """
508 anything = False
509 for out in out:
510 anything = True
511 tmp = None
508 for out in outs:
512 509 for line in out.splitlines():
513 510 # push one line at a time
514 511 tmp = transform.push(line)
515 512 if tmp is not None:
516 513 yield tmp
517 if tmp is None:
518 # transformer is still consuming, reset
519 tmp = transform.reset()
520 if tmp is not None:
521 yield tmp
522 if not anything:
523 # nothing was pushed, reset
524 tmp = transform.reset()
525 if tmp is not None:
526 yield tmp
514
515 # reset the transform
516 tmp = transform.reset()
517 if tmp is not None:
518 yield tmp
527 519
528 520 out = []
529 521 for t in self.transforms_in_use:
530 522 out = _flush(t, out)
531 523
532 524 out = list(out)
533 525 if out:
534 526 self._store('\n'.join(out))
535 527
536 528 def raw_reset(self):
537 529 """Return raw input only and perform a full reset.
538 530 """
539 531 out = self.source_raw
540 532 self.reset()
541 533 return out
542 534
543 535 def source_reset(self):
544 536 try:
545 537 self.flush_transformers()
546 538 return self.source
547 539 finally:
548 540 self.reset()
549 541
550 542 def push_accepts_more(self):
551 543 if self.transformer_accumulating:
552 544 return True
553 545 else:
554 546 return super(IPythonInputSplitter, self).push_accepts_more()
555 547
556 548 def transform_cell(self, cell):
557 549 """Process and translate a cell of input.
558 550 """
559 551 self.reset()
560 552 try:
561 553 self.push(cell)
562 554 self.flush_transformers()
563 555 return self.source
564 556 finally:
565 557 self.reset()
566 558
567 559 def push(self, lines):
568 560 """Push one or more lines of IPython input.
569 561
570 562 This stores the given lines and returns a status code indicating
571 563 whether the code forms a complete Python block or not, after processing
572 564 all input lines for special IPython syntax.
573 565
574 566 Any exceptions generated in compilation are swallowed, but if an
575 567 exception was produced, the method returns True.
576 568
577 569 Parameters
578 570 ----------
579 571 lines : string
580 572 One or more lines of Python input.
581 573
582 574 Returns
583 575 -------
584 576 is_complete : boolean
585 577 True if the current input source (the result of the current input
586 578 plus prior inputs) forms a complete Python execution block. Note that
587 579 this value is also stored as a private attribute (_is_complete), so it
588 580 can be queried at any time.
589 581 """
590 582
591 583 # We must ensure all input is pure unicode
592 584 lines = cast_unicode(lines, self.encoding)
593 585
594 586 # ''.splitlines() --> [], but we need to push the empty line to transformers
595 587 lines_list = lines.splitlines()
596 588 if not lines_list:
597 589 lines_list = ['']
598 590
599 591 # Store raw source before applying any transformations to it. Note
600 592 # that this must be done *after* the reset() call that would otherwise
601 593 # flush the buffer.
602 594 self._store(lines, self._buffer_raw, 'source_raw')
603 595
604 596 for line in lines_list:
605 597 out = self.push_line(line)
606 598
607 599 return out
608 600
609 601 def push_line(self, line):
610 602 buf = self._buffer
611 603
612 604 def _accumulating(dbg):
613 605 #print(dbg)
614 606 self.transformer_accumulating = True
615 607 return False
616 608
617 609 for transformer in self.physical_line_transforms:
618 610 line = transformer.push(line)
619 611 if line is None:
620 612 return _accumulating(transformer)
621 613
622 614 if not self.within_python_line:
623 615 line = self.assemble_logical_lines.push(line)
624 616 if line is None:
625 617 return _accumulating('acc logical line')
626 618
627 619 for transformer in self.logical_line_transforms:
628 620 line = transformer.push(line)
629 621 if line is None:
630 622 return _accumulating(transformer)
631 623
632 624 line = self.assemble_python_lines.push(line)
633 625 if line is None:
634 626 self.within_python_line = True
635 627 return _accumulating('acc python line')
636 628 else:
637 629 self.within_python_line = False
638 630
639 631 for transformer in self.python_line_transforms:
640 632 line = transformer.push(line)
641 633 if line is None:
642 634 return _accumulating(transformer)
643 635
644 636 #print("transformers clear") #debug
645 637 self.transformer_accumulating = False
646 638 return super(IPythonInputSplitter, self).push(line)
General Comments 0
You need to be logged in to leave comments. Login now