##// END OF EJS Templates
Fix test failure in IPython.lib
Thomas Kluyver -
Show More
@@ -1,661 +1,658 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63
64 64 #-----------------------------------------------------------------------------
65 65 # Imports
66 66 #-----------------------------------------------------------------------------
67 67 # stdlib
68 68 import ast
69 69 import codeop
70 70 import re
71 71 import sys
72 72
73 73 # IPython modules
74 74 from IPython.core.splitinput import split_user_input, LineInfo
75 75 from IPython.utils.py3compat import cast_unicode
76 76 from IPython.core.inputtransformer import (leading_indent,
77 77 classic_prompt,
78 78 ipy_prompt,
79 79 cellmagic,
80 80 assemble_logical_lines,
81 81 help_end,
82 82 escaped_commands,
83 83 assign_from_magic,
84 84 assign_from_system,
85 85 assemble_python_lines,
86 86 )
87 87
88 88 # Temporary!
89 89 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
90 90 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
91 91 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
92 92
93 93 #-----------------------------------------------------------------------------
94 94 # Utilities
95 95 #-----------------------------------------------------------------------------
96 96
97 97 # FIXME: These are general-purpose utilities that later can be moved to the
98 98 # general ward. Kept here for now because we're being very strict about test
99 99 # coverage with this code, and this lets us ensure that we keep 100% coverage
100 100 # while developing.
101 101
102 102 # compiled regexps for autoindent management
103 103 dedent_re = re.compile('|'.join([
104 104 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
105 105 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
106 106 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
107 107 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
108 108 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
109 109 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
110 110 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
111 111 ]))
112 112 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
113 113
114 114 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
115 115 # before pure comments
116 116 comment_line_re = re.compile('^\s*\#')
117 117
118 118
119 119 def num_ini_spaces(s):
120 120 """Return the number of initial spaces in a string.
121 121
122 122 Note that tabs are counted as a single space. For now, we do *not* support
123 123 mixing of tabs and spaces in the user's input.
124 124
125 125 Parameters
126 126 ----------
127 127 s : string
128 128
129 129 Returns
130 130 -------
131 131 n : int
132 132 """
133 133
134 134 ini_spaces = ini_spaces_re.match(s)
135 135 if ini_spaces:
136 136 return ini_spaces.end()
137 137 else:
138 138 return 0
139 139
140 140 def last_blank(src):
141 141 """Determine if the input source ends in a blank.
142 142
143 143 A blank is either a newline or a line consisting of whitespace.
144 144
145 145 Parameters
146 146 ----------
147 147 src : string
148 148 A single or multiline string.
149 149 """
150 150 if not src: return False
151 151 ll = src.splitlines()[-1]
152 152 return (ll == '') or ll.isspace()
153 153
154 154
155 155 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
156 156 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
157 157
158 158 def last_two_blanks(src):
159 159 """Determine if the input source ends in two blanks.
160 160
161 161 A blank is either a newline or a line consisting of whitespace.
162 162
163 163 Parameters
164 164 ----------
165 165 src : string
166 166 A single or multiline string.
167 167 """
168 168 if not src: return False
169 169 # The logic here is tricky: I couldn't get a regexp to work and pass all
170 170 # the tests, so I took a different approach: split the source by lines,
171 171 # grab the last two and prepend '###\n' as a stand-in for whatever was in
172 172 # the body before the last two lines. Then, with that structure, it's
173 173 # possible to analyze with two regexps. Not the most elegant solution, but
174 174 # it works. If anyone tries to change this logic, make sure to validate
175 175 # the whole test suite first!
176 176 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
177 177 return (bool(last_two_blanks_re.match(new_src)) or
178 178 bool(last_two_blanks_re2.match(new_src)) )
179 179
180 180
181 181 def remove_comments(src):
182 182 """Remove all comments from input source.
183 183
184 184 Note: comments are NOT recognized inside of strings!
185 185
186 186 Parameters
187 187 ----------
188 188 src : string
189 189 A single or multiline input string.
190 190
191 191 Returns
192 192 -------
193 193 String with all Python comments removed.
194 194 """
195 195
196 196 return re.sub('#.*', '', src)
197 197
198 198
199 199 def get_input_encoding():
200 200 """Return the default standard input encoding.
201 201
202 202 If sys.stdin has no encoding, 'ascii' is returned."""
203 203 # There are strange environments for which sys.stdin.encoding is None. We
204 204 # ensure that a valid encoding is returned.
205 205 encoding = getattr(sys.stdin, 'encoding', None)
206 206 if encoding is None:
207 207 encoding = 'ascii'
208 208 return encoding
209 209
210 210 #-----------------------------------------------------------------------------
211 211 # Classes and functions for normal Python syntax handling
212 212 #-----------------------------------------------------------------------------
213 213
214 214 class InputSplitter(object):
215 215 """An object that can accumulate lines of Python source before execution.
216 216
217 217 This object is designed to be fed python source line-by-line, using
218 218 :meth:`push`. It will return on each push whether the currently pushed
219 219 code could be executed already. In addition, it provides a method called
220 220 :meth:`push_accepts_more` that can be used to query whether more input
221 221 can be pushed into a single interactive block.
222 222
223 223 This is a simple example of how an interactive terminal-based client can use
224 224 this tool::
225 225
226 226 isp = InputSplitter()
227 227 while isp.push_accepts_more():
228 228 indent = ' '*isp.indent_spaces
229 229 prompt = '>>> ' + indent
230 230 line = indent + raw_input(prompt)
231 231 isp.push(line)
232 232 print 'Input source was:\n', isp.source_reset(),
233 233 """
234 234 # Number of spaces of indentation computed from input that has been pushed
235 235 # so far. This is the attributes callers should query to get the current
236 236 # indentation level, in order to provide auto-indent facilities.
237 237 indent_spaces = 0
238 238 # String, indicating the default input encoding. It is computed by default
239 239 # at initialization time via get_input_encoding(), but it can be reset by a
240 240 # client with specific knowledge of the encoding.
241 241 encoding = ''
242 242 # String where the current full source input is stored, properly encoded.
243 243 # Reading this attribute is the normal way of querying the currently pushed
244 244 # source code, that has been properly encoded.
245 245 source = ''
246 246 # Code object corresponding to the current source. It is automatically
247 247 # synced to the source, so it can be queried at any time to obtain the code
248 248 # object; it will be None if the source doesn't compile to valid Python.
249 249 code = None
250 250
251 251 # Private attributes
252 252
253 253 # List with lines of input accumulated so far
254 254 _buffer = None
255 255 # Command compiler
256 256 _compile = None
257 257 # Mark when input has changed indentation all the way back to flush-left
258 258 _full_dedent = False
259 259 # Boolean indicating whether the current block is complete
260 260 _is_complete = None
261 261
262 262 def __init__(self):
263 263 """Create a new InputSplitter instance.
264 264 """
265 265 self._buffer = []
266 266 self._compile = codeop.CommandCompiler()
267 267 self.encoding = get_input_encoding()
268 268
269 269 def reset(self):
270 270 """Reset the input buffer and associated state."""
271 271 self.indent_spaces = 0
272 272 self._buffer[:] = []
273 273 self.source = ''
274 274 self.code = None
275 275 self._is_complete = False
276 276 self._full_dedent = False
277 277
278 278 def source_reset(self):
279 279 """Return the input source and perform a full reset.
280 280 """
281 281 out = self.source
282 282 self.reset()
283 283 return out
284 284
285 285 def push(self, lines):
286 286 """Push one or more lines of input.
287 287
288 288 This stores the given lines and returns a status code indicating
289 289 whether the code forms a complete Python block or not.
290 290
291 291 Any exceptions generated in compilation are swallowed, but if an
292 292 exception was produced, the method returns True.
293 293
294 294 Parameters
295 295 ----------
296 296 lines : string
297 297 One or more lines of Python input.
298 298
299 299 Returns
300 300 -------
301 301 is_complete : boolean
302 302 True if the current input source (the result of the current input
303 303 plus prior inputs) forms a complete Python execution block. Note that
304 304 this value is also stored as a private attribute (``_is_complete``), so it
305 305 can be queried at any time.
306 306 """
307 307 self._store(lines)
308 308 source = self.source
309 309
310 310 # Before calling _compile(), reset the code object to None so that if an
311 311 # exception is raised in compilation, we don't mislead by having
312 312 # inconsistent code/source attributes.
313 313 self.code, self._is_complete = None, None
314 314
315 315 # Honor termination lines properly
316 316 if source.endswith('\\\n'):
317 317 return False
318 318
319 319 self._update_indent(lines)
320 320 try:
321 321 self.code = self._compile(source, symbol="exec")
322 322 # Invalid syntax can produce any of a number of different errors from
323 323 # inside the compiler, so we have to catch them all. Syntax errors
324 324 # immediately produce a 'ready' block, so the invalid Python can be
325 325 # sent to the kernel for evaluation with possible ipython
326 326 # special-syntax conversion.
327 327 except (SyntaxError, OverflowError, ValueError, TypeError,
328 328 MemoryError):
329 329 self._is_complete = True
330 330 else:
331 331 # Compilation didn't produce any exceptions (though it may not have
332 332 # given a complete code object)
333 333 self._is_complete = self.code is not None
334 334
335 335 return self._is_complete
336 336
337 337 def push_accepts_more(self):
338 338 """Return whether a block of interactive input can accept more input.
339 339
340 340 This method is meant to be used by line-oriented frontends, who need to
341 341 guess whether a block is complete or not based solely on prior and
342 342 current input lines. The InputSplitter considers it has a complete
343 interactive block and will not accept more input only when either a
344 SyntaxError is raised, or *all* of the following are true:
345
346 1. The input compiles to a complete statement.
347
348 2. The indentation level is flush-left (because if we are indented,
349 like inside a function definition or for loop, we need to keep
350 reading new input).
343 interactive block and will not accept more input when either:
344
345 * A SyntaxError is raised
351 346
352 3. There is one extra line consisting only of whitespace.
347 * The code is complete and consists of a single line or a single
348 non-compound statement
353 349
354 Because of condition #3, this method should be used only by
355 *line-oriented* frontends, since it means that intermediate blank lines
356 are not allowed in function definitions (or any other indented block).
350 * The code is complete and has a blank line at the end
357 351
358 352 If the current input produces a syntax error, this method immediately
359 353 returns False but does *not* raise the syntax error exception, as
360 354 typically clients will want to send invalid syntax to an execution
361 355 backend which might convert the invalid syntax into valid Python via
362 356 one of the dynamic IPython mechanisms.
363 357 """
364 358
365 359 # With incomplete input, unconditionally accept more
366 360 # A syntax error also sets _is_complete to True - see push()
367 361 if not self._is_complete:
368 362 #print("Not complete") # debug
369 363 return True
370 364
371 365 # The user can make any (complete) input execute by leaving a blank line
372 366 last_line = self.source.splitlines()[-1]
373 367 if (not last_line) or last_line.isspace():
374 368 #print("Blank line") # debug
375 369 return False
376 370
377 # If there's just a single AST node, and we're flush left, as is the
378 # case after a simple statement such as 'a=1', we want to execute it
371 # If there's just a single line or AST node, and we're flush left, as is
372 # the case after a simple statement such as 'a=1', we want to execute it
379 373 # straight away.
380 374 if self.indent_spaces==0:
375 if len(self.source.splitlines()) <= 1:
376 return False
377
381 378 try:
382 379 code_ast = ast.parse(u''.join(self._buffer))
383 380 except Exception:
384 381 #print("Can't parse AST") # debug
385 382 return False
386 383 else:
387 384 if len(code_ast.body) == 1 and \
388 385 not hasattr(code_ast.body[0], 'body'):
389 386 #print("Simple statement") # debug
390 387 return False
391 388
392 389 # General fallback - accept more code
393 390 return True
394 391
395 392 #------------------------------------------------------------------------
396 393 # Private interface
397 394 #------------------------------------------------------------------------
398 395
399 396 def _find_indent(self, line):
400 397 """Compute the new indentation level for a single line.
401 398
402 399 Parameters
403 400 ----------
404 401 line : str
405 402 A single new line of non-whitespace, non-comment Python input.
406 403
407 404 Returns
408 405 -------
409 406 indent_spaces : int
410 407 New value for the indent level (it may be equal to self.indent_spaces
411 408 if indentation doesn't change.
412 409
413 410 full_dedent : boolean
414 411 Whether the new line causes a full flush-left dedent.
415 412 """
416 413 indent_spaces = self.indent_spaces
417 414 full_dedent = self._full_dedent
418 415
419 416 inisp = num_ini_spaces(line)
420 417 if inisp < indent_spaces:
421 418 indent_spaces = inisp
422 419 if indent_spaces <= 0:
423 420 #print 'Full dedent in text',self.source # dbg
424 421 full_dedent = True
425 422
426 423 if line.rstrip()[-1] == ':':
427 424 indent_spaces += 4
428 425 elif dedent_re.match(line):
429 426 indent_spaces -= 4
430 427 if indent_spaces <= 0:
431 428 full_dedent = True
432 429
433 430 # Safety
434 431 if indent_spaces < 0:
435 432 indent_spaces = 0
436 433 #print 'safety' # dbg
437 434
438 435 return indent_spaces, full_dedent
439 436
440 437 def _update_indent(self, lines):
441 438 for line in remove_comments(lines).splitlines():
442 439 if line and not line.isspace():
443 440 self.indent_spaces, self._full_dedent = self._find_indent(line)
444 441
445 442 def _store(self, lines, buffer=None, store='source'):
446 443 """Store one or more lines of input.
447 444
448 445 If input lines are not newline-terminated, a newline is automatically
449 446 appended."""
450 447
451 448 if buffer is None:
452 449 buffer = self._buffer
453 450
454 451 if lines.endswith('\n'):
455 452 buffer.append(lines)
456 453 else:
457 454 buffer.append(lines+'\n')
458 455 setattr(self, store, self._set_source(buffer))
459 456
460 457 def _set_source(self, buffer):
461 458 return u''.join(buffer)
462 459
463 460
464 461 class IPythonInputSplitter(InputSplitter):
465 462 """An input splitter that recognizes all of IPython's special syntax."""
466 463
467 464 # String with raw, untransformed input.
468 465 source_raw = ''
469 466
470 467 # Flag to track when a transformer has stored input that it hasn't given
471 468 # back yet.
472 469 transformer_accumulating = False
473 470
474 471 # Flag to track when assemble_python_lines has stored input that it hasn't
475 472 # given back yet.
476 473 within_python_line = False
477 474
478 475 # Private attributes
479 476
480 477 # List with lines of raw input accumulated so far.
481 478 _buffer_raw = None
482 479
483 480 def __init__(self, line_input_checker=False, physical_line_transforms=None,
484 481 logical_line_transforms=None, python_line_transforms=None):
485 482 super(IPythonInputSplitter, self).__init__()
486 483 self._buffer_raw = []
487 484 self._validate = True
488 485
489 486 if physical_line_transforms is not None:
490 487 self.physical_line_transforms = physical_line_transforms
491 488 else:
492 489 self.physical_line_transforms = [leading_indent(),
493 490 classic_prompt(),
494 491 ipy_prompt(),
495 492 ]
496 493
497 494 self.assemble_logical_lines = assemble_logical_lines()
498 495 if logical_line_transforms is not None:
499 496 self.logical_line_transforms = logical_line_transforms
500 497 else:
501 498 self.logical_line_transforms = [cellmagic(end_on_blank_line=line_input_checker),
502 499 help_end(),
503 500 escaped_commands(),
504 501 assign_from_magic(),
505 502 assign_from_system(),
506 503 ]
507 504
508 505 self.assemble_python_lines = assemble_python_lines()
509 506 if python_line_transforms is not None:
510 507 self.python_line_transforms = python_line_transforms
511 508 else:
512 509 # We don't use any of these at present
513 510 self.python_line_transforms = []
514 511
515 512 @property
516 513 def transforms(self):
517 514 "Quick access to all transformers."
518 515 return self.physical_line_transforms + \
519 516 [self.assemble_logical_lines] + self.logical_line_transforms + \
520 517 [self.assemble_python_lines] + self.python_line_transforms
521 518
522 519 @property
523 520 def transforms_in_use(self):
524 521 """Transformers, excluding logical line transformers if we're in a
525 522 Python line."""
526 523 t = self.physical_line_transforms[:]
527 524 if not self.within_python_line:
528 525 t += [self.assemble_logical_lines] + self.logical_line_transforms
529 526 return t + [self.assemble_python_lines] + self.python_line_transforms
530 527
531 528 def reset(self):
532 529 """Reset the input buffer and associated state."""
533 530 super(IPythonInputSplitter, self).reset()
534 531 self._buffer_raw[:] = []
535 532 self.source_raw = ''
536 533 self.transformer_accumulating = False
537 534 self.within_python_line = False
538 535 for t in self.transforms:
539 536 t.reset()
540 537
541 538 def flush_transformers(self):
542 539 def _flush(transform, out):
543 540 if out is not None:
544 541 tmp = transform.push(out)
545 542 return tmp or transform.reset() or None
546 543 else:
547 544 return transform.reset() or None
548 545
549 546 out = None
550 547 for t in self.transforms_in_use:
551 548 out = _flush(t, out)
552 549
553 550 if out is not None:
554 551 self._store(out)
555 552
556 553 def source_raw_reset(self):
557 554 """Return input and raw source and perform a full reset.
558 555 """
559 556 self.flush_transformers()
560 557 out = self.source
561 558 out_r = self.source_raw
562 559 self.reset()
563 560 return out, out_r
564 561
565 562 def source_reset(self):
566 563 self.flush_transformers()
567 564 return super(IPythonInputSplitter, self).source_reset()
568 565
569 566 def push_accepts_more(self):
570 567 if self.transformer_accumulating:
571 568 return True
572 569 else:
573 570 return super(IPythonInputSplitter, self).push_accepts_more()
574 571
575 572 def transform_cell(self, cell):
576 573 """Process and translate a cell of input.
577 574 """
578 575 self.reset()
579 576 self.push(cell)
580 577 return self.source_reset()
581 578
582 579 def push(self, lines):
583 580 """Push one or more lines of IPython input.
584 581
585 582 This stores the given lines and returns a status code indicating
586 583 whether the code forms a complete Python block or not, after processing
587 584 all input lines for special IPython syntax.
588 585
589 586 Any exceptions generated in compilation are swallowed, but if an
590 587 exception was produced, the method returns True.
591 588
592 589 Parameters
593 590 ----------
594 591 lines : string
595 592 One or more lines of Python input.
596 593
597 594 Returns
598 595 -------
599 596 is_complete : boolean
600 597 True if the current input source (the result of the current input
601 598 plus prior inputs) forms a complete Python execution block. Note that
602 599 this value is also stored as a private attribute (_is_complete), so it
603 600 can be queried at any time.
604 601 """
605 602
606 603 # We must ensure all input is pure unicode
607 604 lines = cast_unicode(lines, self.encoding)
608 605
609 606 # ''.splitlines() --> [], but we need to push the empty line to transformers
610 607 lines_list = lines.splitlines()
611 608 if not lines_list:
612 609 lines_list = ['']
613 610
614 611 # Store raw source before applying any transformations to it. Note
615 612 # that this must be done *after* the reset() call that would otherwise
616 613 # flush the buffer.
617 614 self._store(lines, self._buffer_raw, 'source_raw')
618 615
619 616 for line in lines_list:
620 617 out = self.push_line(line)
621 618
622 619 return out
623 620
624 621 def push_line(self, line):
625 622 buf = self._buffer
626 623
627 624 def _accumulating(dbg):
628 625 #print(dbg)
629 626 self.transformer_accumulating = True
630 627 return False
631 628
632 629 for transformer in self.physical_line_transforms:
633 630 line = transformer.push(line)
634 631 if line is None:
635 632 return _accumulating(transformer)
636 633
637 634 if not self.within_python_line:
638 635 line = self.assemble_logical_lines.push(line)
639 636 if line is None:
640 637 return _accumulating('acc logical line')
641 638
642 639 for transformer in self.logical_line_transforms:
643 640 line = transformer.push(line)
644 641 if line is None:
645 642 return _accumulating(transformer)
646 643
647 644 line = self.assemble_python_lines.push(line)
648 645 if line is None:
649 646 self.within_python_line = True
650 647 return _accumulating('acc python line')
651 648 else:
652 649 self.within_python_line = False
653 650
654 651 for transformer in self.python_line_transforms:
655 652 line = transformer.push(line)
656 653 if line is None:
657 654 return _accumulating(transformer)
658 655
659 656 #print("transformers clear") #debug
660 657 self.transformer_accumulating = False
661 658 return super(IPythonInputSplitter, self).push(line)
General Comments 0
You need to be logged in to leave comments. Login now