##// END OF EJS Templates
New ast_nodes method, and change push_accepts_more to use it.
Thomas Kluyver -
Show More
@@ -1,1009 +1,1030 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63 from __future__ import print_function
64 64
65 65 #-----------------------------------------------------------------------------
66 66 # Imports
67 67 #-----------------------------------------------------------------------------
68 68 # stdlib
69 69 import ast
70 70 import codeop
71 71 import re
72 72 import sys
73 73
74 74 # IPython modules
75 75 from IPython.utils.text import make_quoted_expr
76 76
77 77 #-----------------------------------------------------------------------------
78 78 # Globals
79 79 #-----------------------------------------------------------------------------
80 80
81 81 # The escape sequences that define the syntax transformations IPython will
82 82 # apply to user input. These can NOT be just changed here: many regular
83 83 # expressions and other parts of the code may use their hardcoded values, and
84 84 # for all intents and purposes they constitute the 'IPython syntax', so they
85 85 # should be considered fixed.
86 86
87 87 ESC_SHELL = '!' # Send line to underlying system shell
88 88 ESC_SH_CAP = '!!' # Send line to system shell and capture output
89 89 ESC_HELP = '?' # Find information about object
90 90 ESC_HELP2 = '??' # Find extra-detailed information about object
91 91 ESC_MAGIC = '%' # Call magic function
92 92 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
93 93 ESC_QUOTE2 = ';' # Quote all args as a single string, call
94 94 ESC_PAREN = '/' # Call first argument with rest of line as arguments
95 95
96 96 #-----------------------------------------------------------------------------
97 97 # Utilities
98 98 #-----------------------------------------------------------------------------
99 99
100 100 # FIXME: These are general-purpose utilities that later can be moved to the
101 101 # general ward. Kept here for now because we're being very strict about test
102 102 # coverage with this code, and this lets us ensure that we keep 100% coverage
103 103 # while developing.
104 104
105 105 # compiled regexps for autoindent management
106 106 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
107 107 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
108 108
109 109 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
110 110 # before pure comments
111 111 comment_line_re = re.compile('^\s*\#')
112 112
113 113
114 114 def num_ini_spaces(s):
115 115 """Return the number of initial spaces in a string.
116 116
117 117 Note that tabs are counted as a single space. For now, we do *not* support
118 118 mixing of tabs and spaces in the user's input.
119 119
120 120 Parameters
121 121 ----------
122 122 s : string
123 123
124 124 Returns
125 125 -------
126 126 n : int
127 127 """
128 128
129 129 ini_spaces = ini_spaces_re.match(s)
130 130 if ini_spaces:
131 131 return ini_spaces.end()
132 132 else:
133 133 return 0
134 134
135 135
136 136 def remove_comments(src):
137 137 """Remove all comments from input source.
138 138
139 139 Note: comments are NOT recognized inside of strings!
140 140
141 141 Parameters
142 142 ----------
143 143 src : string
144 144 A single or multiline input string.
145 145
146 146 Returns
147 147 -------
148 148 String with all Python comments removed.
149 149 """
150 150
151 151 return re.sub('#.*', '', src)
152 152
153 153
154 154 def get_input_encoding():
155 155 """Return the default standard input encoding.
156 156
157 157 If sys.stdin has no encoding, 'ascii' is returned."""
158 158 # There are strange environments for which sys.stdin.encoding is None. We
159 159 # ensure that a valid encoding is returned.
160 160 encoding = getattr(sys.stdin, 'encoding', None)
161 161 if encoding is None:
162 162 encoding = 'ascii'
163 163 return encoding
164 164
165 165 #-----------------------------------------------------------------------------
166 166 # Classes and functions for normal Python syntax handling
167 167 #-----------------------------------------------------------------------------
168 168
169 169 # HACK! This implementation, written by Robert K a while ago using the
170 170 # compiler module, is more robust than the other one below, but it expects its
171 171 # input to be pure python (no ipython syntax). For now we're using it as a
172 172 # second-pass splitter after the first pass transforms the input to pure
173 173 # python.
174 174
175 175 def split_blocks(python):
176 176 """ Split multiple lines of code into discrete commands that can be
177 177 executed singly.
178 178
179 179 Parameters
180 180 ----------
181 181 python : str
182 182 Pure, exec'able Python code.
183 183
184 184 Returns
185 185 -------
186 186 commands : list of str
187 187 Separate commands that can be exec'ed independently.
188 188 """
189 189 # compiler.parse treats trailing spaces after a newline as a
190 190 # SyntaxError. This is different than codeop.CommandCompiler, which
191 191 # will compile the trailng spaces just fine. We simply strip any
192 192 # trailing whitespace off. Passing a string with trailing whitespace
193 193 # to exec will fail however. There seems to be some inconsistency in
194 194 # how trailing whitespace is handled, but this seems to work.
195 195 python_ori = python # save original in case we bail on error
196 196 python = python.strip()
197 197
198 198 # The compiler module will parse the code into an abstract syntax tree.
199 199 # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
200 200 try:
201 201 code_ast = ast.parse(python)
202 202 except:
203 203 return [python_ori]
204 204
205 205 # Uncomment to help debug the ast tree
206 206 # for n in code_ast.body:
207 207 # print n.lineno,'->',n
208 208
209 209 # Each separate command is available by iterating over ast.node. The
210 210 # lineno attribute is the line number (1-indexed) beginning the commands
211 211 # suite.
212 212 # lines ending with ";" yield a Discard Node that doesn't have a lineno
213 213 # attribute. These nodes can and should be discarded. But there are
214 214 # other situations that cause Discard nodes that shouldn't be discarded.
215 215 # We might eventually discover other cases where lineno is None and have
216 216 # to put in a more sophisticated test.
217 217 linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None]
218 218
219 219 # When we finally get the slices, we will need to slice all the way to
220 220 # the end even though we don't have a line number for it. Fortunately,
221 221 # None does the job nicely.
222 222 linenos.append(None)
223 223
224 224 # Same problem at the other end: sometimes the ast tree has its
225 225 # first complete statement not starting on line 0. In this case
226 226 # we might miss part of it. This fixes ticket 266993. Thanks Gael!
227 227 linenos[0] = 0
228 228
229 229 lines = python.splitlines()
230 230
231 231 # Create a list of atomic commands.
232 232 cmds = []
233 233 for i, j in zip(linenos[:-1], linenos[1:]):
234 234 cmd = lines[i:j]
235 235 if cmd:
236 236 cmds.append('\n'.join(cmd)+'\n')
237 237
238 238 return cmds
239 239
240 240
241 241 class InputSplitter(object):
242 242 """An object that can split Python source input in executable blocks.
243 243
244 244 This object is designed to be used in one of two basic modes:
245 245
246 246 1. By feeding it python source line-by-line, using :meth:`push`. In this
247 247 mode, it will return on each push whether the currently pushed code
248 248 could be executed already. In addition, it provides a method called
249 249 :meth:`push_accepts_more` that can be used to query whether more input
250 250 can be pushed into a single interactive block.
251 251
252 252 2. By calling :meth:`split_blocks` with a single, multiline Python string,
253 253 that is then split into blocks each of which can be executed
254 254 interactively as a single statement.
255 255
256 256 This is a simple example of how an interactive terminal-based client can use
257 257 this tool::
258 258
259 259 isp = InputSplitter()
260 260 while isp.push_accepts_more():
261 261 indent = ' '*isp.indent_spaces
262 262 prompt = '>>> ' + indent
263 263 line = indent + raw_input(prompt)
264 264 isp.push(line)
265 265 print 'Input source was:\n', isp.source_reset(),
266 266 """
267 267 # Number of spaces of indentation computed from input that has been pushed
268 268 # so far. This is the attributes callers should query to get the current
269 269 # indentation level, in order to provide auto-indent facilities.
270 270 indent_spaces = 0
271 271 # String, indicating the default input encoding. It is computed by default
272 272 # at initialization time via get_input_encoding(), but it can be reset by a
273 273 # client with specific knowledge of the encoding.
274 274 encoding = ''
275 275 # String where the current full source input is stored, properly encoded.
276 276 # Reading this attribute is the normal way of querying the currently pushed
277 277 # source code, that has been properly encoded.
278 278 source = ''
279 279 # Code object corresponding to the current source. It is automatically
280 280 # synced to the source, so it can be queried at any time to obtain the code
281 281 # object; it will be None if the source doesn't compile to valid Python.
282 282 code = None
283 283 # Input mode
284 284 input_mode = 'line'
285 285
286 286 # Private attributes
287 287
288 288 # List with lines of input accumulated so far
289 289 _buffer = None
290 290 # Command compiler
291 291 _compile = None
292 292 # Mark when input has changed indentation all the way back to flush-left
293 293 _full_dedent = False
294 294 # Boolean indicating whether the current block is complete
295 295 _is_complete = None
296 296
297 297 def __init__(self, input_mode=None):
298 298 """Create a new InputSplitter instance.
299 299
300 300 Parameters
301 301 ----------
302 302 input_mode : str
303 303
304 304 One of ['line', 'cell']; default is 'line'.
305 305
306 306 The input_mode parameter controls how new inputs are used when fed via
307 307 the :meth:`push` method:
308 308
309 309 - 'line': meant for line-oriented clients, inputs are appended one at a
310 310 time to the internal buffer and the whole buffer is compiled.
311 311
312 312 - 'cell': meant for clients that can edit multi-line 'cells' of text at
313 313 a time. A cell can contain one or more blocks that can be compile in
314 314 'single' mode by Python. In this mode, each new input new input
315 315 completely replaces all prior inputs. Cell mode is thus equivalent
316 316 to prepending a full reset() to every push() call.
317 317 """
318 318 self._buffer = []
319 319 self._compile = codeop.CommandCompiler()
320 320 self.encoding = get_input_encoding()
321 321 self.input_mode = InputSplitter.input_mode if input_mode is None \
322 322 else input_mode
323 323
324 324 def reset(self):
325 325 """Reset the input buffer and associated state."""
326 326 self.indent_spaces = 0
327 327 self._buffer[:] = []
328 328 self.source = ''
329 329 self.code = None
330 330 self._is_complete = False
331 331 self._full_dedent = False
332 332
333 333 def source_reset(self):
334 334 """Return the input source and perform a full reset.
335 335 """
336 336 out = self.source
337 337 self.reset()
338 338 return out
339 339
340 340 def push(self, lines):
341 341 """Push one or more lines of input.
342 342
343 343 This stores the given lines and returns a status code indicating
344 344 whether the code forms a complete Python block or not.
345 345
346 346 Any exceptions generated in compilation are swallowed, but if an
347 347 exception was produced, the method returns True.
348 348
349 349 Parameters
350 350 ----------
351 351 lines : string
352 352 One or more lines of Python input.
353 353
354 354 Returns
355 355 -------
356 356 is_complete : boolean
357 357 True if the current input source (the result of the current input
358 358 plus prior inputs) forms a complete Python execution block. Note that
359 359 this value is also stored as a private attribute (_is_complete), so it
360 360 can be queried at any time.
361 361 """
362 362 if self.input_mode == 'cell':
363 363 self.reset()
364 364
365 365 self._store(lines)
366 366 source = self.source
367 367
368 368 # Before calling _compile(), reset the code object to None so that if an
369 369 # exception is raised in compilation, we don't mislead by having
370 370 # inconsistent code/source attributes.
371 371 self.code, self._is_complete = None, None
372 372
373 373 # Honor termination lines properly
374 374 if source.rstrip().endswith('\\'):
375 375 return False
376 376
377 377 self._update_indent(lines)
378 378 try:
379 379 self.code = self._compile(source)
380 380 # Invalid syntax can produce any of a number of different errors from
381 381 # inside the compiler, so we have to catch them all. Syntax errors
382 382 # immediately produce a 'ready' block, so the invalid Python can be
383 383 # sent to the kernel for evaluation with possible ipython
384 384 # special-syntax conversion.
385 385 except (SyntaxError, OverflowError, ValueError, TypeError,
386 386 MemoryError):
387 387 self._is_complete = True
388 388 else:
389 389 # Compilation didn't produce any exceptions (though it may not have
390 390 # given a complete code object)
391 391 self._is_complete = self.code is not None
392 392
393 393 return self._is_complete
394 394
395 395 def push_accepts_more(self):
396 396 """Return whether a block of interactive input can accept more input.
397 397
398 398 This method is meant to be used by line-oriented frontends, who need to
399 399 guess whether a block is complete or not based solely on prior and
400 400 current input lines. The InputSplitter considers it has a complete
401 401 interactive block and will not accept more input only when either a
402 402 SyntaxError is raised, or *all* of the following are true:
403 403
404 404 1. The input compiles to a complete statement.
405 405
406 406 2. The indentation level is flush-left (because if we are indented,
407 407 like inside a function definition or for loop, we need to keep
408 408 reading new input).
409 409
410 410 3. There is one extra line consisting only of whitespace.
411 411
412 412 Because of condition #3, this method should be used only by
413 413 *line-oriented* frontends, since it means that intermediate blank lines
414 414 are not allowed in function definitions (or any other indented block).
415 415
416 416 Block-oriented frontends that have a separate keyboard event to
417 417 indicate execution should use the :meth:`split_blocks` method instead.
418 418
419 419 If the current input produces a syntax error, this method immediately
420 420 returns False but does *not* raise the syntax error exception, as
421 421 typically clients will want to send invalid syntax to an execution
422 422 backend which might convert the invalid syntax into valid Python via
423 423 one of the dynamic IPython mechanisms.
424 424 """
425 425
426 426 # With incomplete input, unconditionally accept more
427 427 if not self._is_complete:
428 428 return True
429 429
430 430 # If we already have complete input and we're flush left, the answer
431 431 # depends. In line mode, if there hasn't been any indentation,
432 432 # that's it. If we've come back from some indentation, we need
433 433 # the blank final line to finish.
434 434 # In cell mode, we need to check how many blocks the input so far
435 435 # compiles into, because if there's already more than one full
436 436 # independent block of input, then the client has entered full
437 437 # 'cell' mode and is feeding lines that each is complete. In this
438 438 # case we should then keep accepting. The Qt terminal-like console
439 439 # does precisely this, to provide the convenience of terminal-like
440 440 # input of single expressions, but allowing the user (with a
441 441 # separate keystroke) to switch to 'cell' mode and type multiple
442 442 # expressions in one shot.
443 443 if self.indent_spaces==0:
444 444 if self.input_mode=='line':
445 445 if not self._full_dedent:
446 446 return False
447 447 else:
448 nblocks = len(split_blocks(''.join(self._buffer)))
449 if nblocks==1:
448 try:
449 nodes = self.ast_nodes()
450 except Exception:
450 451 return False
452 else:
453 if len(nodes) == 1:
454 return False
451 455
452 456 # When input is complete, then termination is marked by an extra blank
453 457 # line at the end.
454 458 last_line = self.source.splitlines()[-1]
455 459 return bool(last_line and not last_line.isspace())
456 460
457 461 def split_blocks(self, lines):
458 462 """Split a multiline string into multiple input blocks.
459 463
460 464 Note: this method starts by performing a full reset().
461 465
462 466 Parameters
463 467 ----------
464 468 lines : str
465 469 A possibly multiline string.
466 470
467 471 Returns
468 472 -------
469 473 blocks : list
470 474 A list of strings, each possibly multiline. Each string corresponds
471 475 to a single block that can be compiled in 'single' mode (unless it
472 476 has a syntax error)."""
473 477
474 478 # This code is fairly delicate. If you make any changes here, make
475 479 # absolutely sure that you do run the full test suite and ALL tests
476 480 # pass.
477 481
478 482 self.reset()
479 483 blocks = []
480 484
481 485 # Reversed copy so we can use pop() efficiently and consume the input
482 486 # as a stack
483 487 lines = lines.splitlines()[::-1]
484 488 # Outer loop over all input
485 489 while lines:
486 490 #print 'Current lines:', lines # dbg
487 491 # Inner loop to build each block
488 492 while True:
489 493 # Safety exit from inner loop
490 494 if not lines:
491 495 break
492 496 # Grab next line but don't push it yet
493 497 next_line = lines.pop()
494 498 # Blank/empty lines are pushed as-is
495 499 if not next_line or next_line.isspace():
496 500 self.push(next_line)
497 501 continue
498 502
499 503 # Check indentation changes caused by the *next* line
500 504 indent_spaces, _full_dedent = self._find_indent(next_line)
501 505
502 506 # If the next line causes a dedent, it can be for two differnt
503 507 # reasons: either an explicit de-dent by the user or a
504 508 # return/raise/pass statement. These MUST be handled
505 509 # separately:
506 510 #
507 511 # 1. the first case is only detected when the actual explicit
508 512 # dedent happens, and that would be the *first* line of a *new*
509 513 # block. Thus, we must put the line back into the input buffer
510 514 # so that it starts a new block on the next pass.
511 515 #
512 516 # 2. the second case is detected in the line before the actual
513 517 # dedent happens, so , we consume the line and we can break out
514 518 # to start a new block.
515 519
516 520 # Case 1, explicit dedent causes a break.
517 521 # Note: check that we weren't on the very last line, else we'll
518 522 # enter an infinite loop adding/removing the last line.
519 523 if _full_dedent and lines and not next_line.startswith(' '):
520 524 lines.append(next_line)
521 525 break
522 526
523 527 # Otherwise any line is pushed
524 528 self.push(next_line)
525 529
526 530 # Case 2, full dedent with full block ready:
527 531 if _full_dedent or \
528 532 self.indent_spaces==0 and not self.push_accepts_more():
529 533 break
530 534 # Form the new block with the current source input
531 535 blocks.append(self.source_reset())
532 536
533 537 #return blocks
534 538 # HACK!!! Now that our input is in blocks but guaranteed to be pure
535 539 # python syntax, feed it back a second time through the AST-based
536 540 # splitter, which is more accurate than ours.
537 541 return split_blocks(''.join(blocks))
542
543 def ast_nodes(self, lines=None):
544 """Turn the lines into a list of AST nodes.
545
546 Parameters
547 ----------
548 lines : str
549 A (possibly multiline) string of Python code. If None (default), it
550 will use the InputSplitter's current code buffer.
551
552 Returns
553 -------
554 A list of AST (abstract syntax tree) nodes representing the code.
555 """
556 if lines is None:
557 lines = u"".join(self._buffer)
558 return ast.parse(lines).body
538 559
539 560 #------------------------------------------------------------------------
540 561 # Private interface
541 562 #------------------------------------------------------------------------
542 563
543 564 def _find_indent(self, line):
544 565 """Compute the new indentation level for a single line.
545 566
546 567 Parameters
547 568 ----------
548 569 line : str
549 570 A single new line of non-whitespace, non-comment Python input.
550 571
551 572 Returns
552 573 -------
553 574 indent_spaces : int
554 575 New value for the indent level (it may be equal to self.indent_spaces
555 576 if indentation doesn't change.
556 577
557 578 full_dedent : boolean
558 579 Whether the new line causes a full flush-left dedent.
559 580 """
560 581 indent_spaces = self.indent_spaces
561 582 full_dedent = self._full_dedent
562 583
563 584 inisp = num_ini_spaces(line)
564 585 if inisp < indent_spaces:
565 586 indent_spaces = inisp
566 587 if indent_spaces <= 0:
567 588 #print 'Full dedent in text',self.source # dbg
568 589 full_dedent = True
569 590
570 591 if line[-1] == ':':
571 592 indent_spaces += 4
572 593 elif dedent_re.match(line):
573 594 indent_spaces -= 4
574 595 if indent_spaces <= 0:
575 596 full_dedent = True
576 597
577 598 # Safety
578 599 if indent_spaces < 0:
579 600 indent_spaces = 0
580 601 #print 'safety' # dbg
581 602
582 603 return indent_spaces, full_dedent
583 604
584 605 def _update_indent(self, lines):
585 606 for line in remove_comments(lines).splitlines():
586 607 if line and not line.isspace():
587 608 self.indent_spaces, self._full_dedent = self._find_indent(line)
588 609
589 610 def _store(self, lines, buffer=None, store='source'):
590 611 """Store one or more lines of input.
591 612
592 613 If input lines are not newline-terminated, a newline is automatically
593 614 appended."""
594 615
595 616 if buffer is None:
596 617 buffer = self._buffer
597 618
598 619 if lines.endswith('\n'):
599 620 buffer.append(lines)
600 621 else:
601 622 buffer.append(lines+'\n')
602 623 setattr(self, store, self._set_source(buffer))
603 624
604 625 def _set_source(self, buffer):
605 626 return u''.join(buffer)
606 627
607 628
608 629 #-----------------------------------------------------------------------------
609 630 # Functions and classes for IPython-specific syntactic support
610 631 #-----------------------------------------------------------------------------
611 632
612 633 # RegExp for splitting line contents into pre-char//first word-method//rest.
613 634 # For clarity, each group in on one line.
614 635
615 636 line_split = re.compile("""
616 637 ^(\s*) # any leading space
617 638 ([,;/%]|!!?|\?\??) # escape character or characters
618 639 \s*(%?[\w\.\*]*) # function/method, possibly with leading %
619 640 # to correctly treat things like '?%magic'
620 641 (\s+.*$|$) # rest of line
621 642 """, re.VERBOSE)
622 643
623 644
624 645 def split_user_input(line):
625 646 """Split user input into early whitespace, esc-char, function part and rest.
626 647
627 648 This is currently handles lines with '=' in them in a very inconsistent
628 649 manner.
629 650
630 651 Examples
631 652 ========
632 653 >>> split_user_input('x=1')
633 654 ('', '', 'x=1', '')
634 655 >>> split_user_input('?')
635 656 ('', '?', '', '')
636 657 >>> split_user_input('??')
637 658 ('', '??', '', '')
638 659 >>> split_user_input(' ?')
639 660 (' ', '?', '', '')
640 661 >>> split_user_input(' ??')
641 662 (' ', '??', '', '')
642 663 >>> split_user_input('??x')
643 664 ('', '??', 'x', '')
644 665 >>> split_user_input('?x=1')
645 666 ('', '', '?x=1', '')
646 667 >>> split_user_input('!ls')
647 668 ('', '!', 'ls', '')
648 669 >>> split_user_input(' !ls')
649 670 (' ', '!', 'ls', '')
650 671 >>> split_user_input('!!ls')
651 672 ('', '!!', 'ls', '')
652 673 >>> split_user_input(' !!ls')
653 674 (' ', '!!', 'ls', '')
654 675 >>> split_user_input(',ls')
655 676 ('', ',', 'ls', '')
656 677 >>> split_user_input(';ls')
657 678 ('', ';', 'ls', '')
658 679 >>> split_user_input(' ;ls')
659 680 (' ', ';', 'ls', '')
660 681 >>> split_user_input('f.g(x)')
661 682 ('', '', 'f.g(x)', '')
662 683 >>> split_user_input('f.g (x)')
663 684 ('', '', 'f.g', '(x)')
664 685 >>> split_user_input('?%hist')
665 686 ('', '?', '%hist', '')
666 687 >>> split_user_input('?x*')
667 688 ('', '?', 'x*', '')
668 689 """
669 690 match = line_split.match(line)
670 691 if match:
671 692 lspace, esc, fpart, rest = match.groups()
672 693 else:
673 694 # print "match failed for line '%s'" % line
674 695 try:
675 696 fpart, rest = line.split(None, 1)
676 697 except ValueError:
677 698 # print "split failed for line '%s'" % line
678 699 fpart, rest = line,''
679 700 lspace = re.match('^(\s*)(.*)', line).groups()[0]
680 701 esc = ''
681 702
682 703 # fpart has to be a valid python identifier, so it better be only pure
683 704 # ascii, no unicode:
684 705 try:
685 706 fpart = fpart.encode('ascii')
686 707 except UnicodeEncodeError:
687 708 lspace = unicode(lspace)
688 709 rest = fpart + u' ' + rest
689 710 fpart = u''
690 711
691 712 #print 'line:<%s>' % line # dbg
692 713 #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg
693 714 return lspace, esc, fpart.strip(), rest.lstrip()
694 715
695 716
696 717 # The escaped translators ALL receive a line where their own escape has been
697 718 # stripped. Only '?' is valid at the end of the line, all others can only be
698 719 # placed at the start.
699 720
700 721 class LineInfo(object):
701 722 """A single line of input and associated info.
702 723
703 724 This is a utility class that mostly wraps the output of
704 725 :func:`split_user_input` into a convenient object to be passed around
705 726 during input transformations.
706 727
707 728 Includes the following as properties:
708 729
709 730 line
710 731 The original, raw line
711 732
712 733 lspace
713 734 Any early whitespace before actual text starts.
714 735
715 736 esc
716 737 The initial esc character (or characters, for double-char escapes like
717 738 '??' or '!!').
718 739
719 740 fpart
720 741 The 'function part', which is basically the maximal initial sequence
721 742 of valid python identifiers and the '.' character. This is what is
722 743 checked for alias and magic transformations, used for auto-calling,
723 744 etc.
724 745
725 746 rest
726 747 Everything else on the line.
727 748 """
728 749 def __init__(self, line):
729 750 self.line = line
730 751 self.lspace, self.esc, self.fpart, self.rest = \
731 752 split_user_input(line)
732 753
733 754 def __str__(self):
734 755 return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc,
735 756 self.fpart, self.rest)
736 757
737 758
738 759 # Transformations of the special syntaxes that don't rely on an explicit escape
739 760 # character but instead on patterns on the input line
740 761
741 762 # The core transformations are implemented as standalone functions that can be
742 763 # tested and validated in isolation. Each of these uses a regexp, we
743 764 # pre-compile these and keep them close to each function definition for clarity
744 765
745 766 _assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
746 767 r'\s*=\s*!\s*(?P<cmd>.*)')
747 768
748 769 def transform_assign_system(line):
749 770 """Handle the `files = !ls` syntax."""
750 771 m = _assign_system_re.match(line)
751 772 if m is not None:
752 773 cmd = m.group('cmd')
753 774 lhs = m.group('lhs')
754 775 expr = make_quoted_expr(cmd)
755 776 new_line = '%s = get_ipython().getoutput(%s)' % (lhs, expr)
756 777 return new_line
757 778 return line
758 779
759 780
760 781 _assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
761 782 r'\s*=\s*%\s*(?P<cmd>.*)')
762 783
763 784 def transform_assign_magic(line):
764 785 """Handle the `a = %who` syntax."""
765 786 m = _assign_magic_re.match(line)
766 787 if m is not None:
767 788 cmd = m.group('cmd')
768 789 lhs = m.group('lhs')
769 790 expr = make_quoted_expr(cmd)
770 791 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
771 792 return new_line
772 793 return line
773 794
774 795
775 796 _classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )')
776 797
777 798 def transform_classic_prompt(line):
778 799 """Handle inputs that start with '>>> ' syntax."""
779 800
780 801 if not line or line.isspace():
781 802 return line
782 803 m = _classic_prompt_re.match(line)
783 804 if m:
784 805 return line[len(m.group(0)):]
785 806 else:
786 807 return line
787 808
788 809
789 810 _ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
790 811
791 812 def transform_ipy_prompt(line):
792 813 """Handle inputs that start classic IPython prompt syntax."""
793 814
794 815 if not line or line.isspace():
795 816 return line
796 817 #print 'LINE: %r' % line # dbg
797 818 m = _ipy_prompt_re.match(line)
798 819 if m:
799 820 #print 'MATCH! %r -> %r' % (line, line[len(m.group(0)):]) # dbg
800 821 return line[len(m.group(0)):]
801 822 else:
802 823 return line
803 824
804 825
805 826 class EscapedTransformer(object):
806 827 """Class to transform lines that are explicitly escaped out."""
807 828
808 829 def __init__(self):
809 830 tr = { ESC_SHELL : self._tr_system,
810 831 ESC_SH_CAP : self._tr_system2,
811 832 ESC_HELP : self._tr_help,
812 833 ESC_HELP2 : self._tr_help,
813 834 ESC_MAGIC : self._tr_magic,
814 835 ESC_QUOTE : self._tr_quote,
815 836 ESC_QUOTE2 : self._tr_quote2,
816 837 ESC_PAREN : self._tr_paren }
817 838 self.tr = tr
818 839
819 840 # Support for syntax transformations that use explicit escapes typed by the
820 841 # user at the beginning of a line
821 842 @staticmethod
822 843 def _tr_system(line_info):
823 844 "Translate lines escaped with: !"
824 845 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
825 846 return '%sget_ipython().system(%s)' % (line_info.lspace,
826 847 make_quoted_expr(cmd))
827 848
828 849 @staticmethod
829 850 def _tr_system2(line_info):
830 851 "Translate lines escaped with: !!"
831 852 cmd = line_info.line.lstrip()[2:]
832 853 return '%sget_ipython().getoutput(%s)' % (line_info.lspace,
833 854 make_quoted_expr(cmd))
834 855
835 856 @staticmethod
836 857 def _tr_help(line_info):
837 858 "Translate lines escaped with: ?/??"
838 859 # A naked help line should just fire the intro help screen
839 860 if not line_info.line[1:]:
840 861 return 'get_ipython().show_usage()'
841 862
842 863 # There may be one or two '?' at the end, move them to the front so that
843 864 # the rest of the logic can assume escapes are at the start
844 865 l_ori = line_info
845 866 line = line_info.line
846 867 if line.endswith('?'):
847 868 line = line[-1] + line[:-1]
848 869 if line.endswith('?'):
849 870 line = line[-1] + line[:-1]
850 871 line_info = LineInfo(line)
851 872
852 873 # From here on, simply choose which level of detail to get, and
853 874 # special-case the psearch syntax
854 875 pinfo = 'pinfo' # default
855 876 if '*' in line_info.line:
856 877 pinfo = 'psearch'
857 878 elif line_info.esc == '??':
858 879 pinfo = 'pinfo2'
859 880
860 881 tpl = '%sget_ipython().magic(u"%s %s")'
861 882 return tpl % (line_info.lspace, pinfo,
862 883 ' '.join([line_info.fpart, line_info.rest]).strip())
863 884
864 885 @staticmethod
865 886 def _tr_magic(line_info):
866 887 "Translate lines escaped with: %"
867 888 tpl = '%sget_ipython().magic(%s)'
868 889 cmd = make_quoted_expr(' '.join([line_info.fpart,
869 890 line_info.rest]).strip())
870 891 return tpl % (line_info.lspace, cmd)
871 892
872 893 @staticmethod
873 894 def _tr_quote(line_info):
874 895 "Translate lines escaped with: ,"
875 896 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
876 897 '", "'.join(line_info.rest.split()) )
877 898
878 899 @staticmethod
879 900 def _tr_quote2(line_info):
880 901 "Translate lines escaped with: ;"
881 902 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
882 903 line_info.rest)
883 904
884 905 @staticmethod
885 906 def _tr_paren(line_info):
886 907 "Translate lines escaped with: /"
887 908 return '%s%s(%s)' % (line_info.lspace, line_info.fpart,
888 909 ", ".join(line_info.rest.split()))
889 910
890 911 def __call__(self, line):
891 912 """Class to transform lines that are explicitly escaped out.
892 913
893 914 This calls the above _tr_* static methods for the actual line
894 915 translations."""
895 916
896 917 # Empty lines just get returned unmodified
897 918 if not line or line.isspace():
898 919 return line
899 920
900 921 # Get line endpoints, where the escapes can be
901 922 line_info = LineInfo(line)
902 923
903 924 # If the escape is not at the start, only '?' needs to be special-cased.
904 925 # All other escapes are only valid at the start
905 926 if not line_info.esc in self.tr:
906 927 if line.endswith(ESC_HELP):
907 928 return self._tr_help(line_info)
908 929 else:
909 930 # If we don't recognize the escape, don't modify the line
910 931 return line
911 932
912 933 return self.tr[line_info.esc](line_info)
913 934
914 935
915 936 # A function-looking object to be used by the rest of the code. The purpose of
916 937 # the class in this case is to organize related functionality, more than to
917 938 # manage state.
918 939 transform_escaped = EscapedTransformer()
919 940
920 941
921 942 class IPythonInputSplitter(InputSplitter):
922 943 """An input splitter that recognizes all of IPython's special syntax."""
923 944
924 945 # String with raw, untransformed input.
925 946 source_raw = ''
926 947
927 948 # Private attributes
928 949
929 950 # List with lines of raw input accumulated so far.
930 951 _buffer_raw = None
931 952
932 953 def __init__(self, input_mode=None):
933 954 InputSplitter.__init__(self, input_mode)
934 955 self._buffer_raw = []
935 956
936 957 def reset(self):
937 958 """Reset the input buffer and associated state."""
938 959 InputSplitter.reset(self)
939 960 self._buffer_raw[:] = []
940 961 self.source_raw = ''
941 962
942 963 def source_raw_reset(self):
943 964 """Return input and raw source and perform a full reset.
944 965 """
945 966 out = self.source
946 967 out_r = self.source_raw
947 968 self.reset()
948 969 return out, out_r
949 970
950 971 def push(self, lines):
951 972 """Push one or more lines of IPython input.
952 973 """
953 974 if not lines:
954 975 return super(IPythonInputSplitter, self).push(lines)
955 976
956 977 # We must ensure all input is pure unicode
957 978 if type(lines)==str:
958 979 lines = lines.decode(self.encoding)
959 980
960 981 lines_list = lines.splitlines()
961 982
962 983 transforms = [transform_escaped, transform_assign_system,
963 984 transform_assign_magic, transform_ipy_prompt,
964 985 transform_classic_prompt]
965 986
966 987 # Transform logic
967 988 #
968 989 # We only apply the line transformers to the input if we have either no
969 990 # input yet, or complete input, or if the last line of the buffer ends
970 991 # with ':' (opening an indented block). This prevents the accidental
971 992 # transformation of escapes inside multiline expressions like
972 993 # triple-quoted strings or parenthesized expressions.
973 994 #
974 995 # The last heuristic, while ugly, ensures that the first line of an
975 996 # indented block is correctly transformed.
976 997 #
977 998 # FIXME: try to find a cleaner approach for this last bit.
978 999
979 1000 # If we were in 'block' mode, since we're going to pump the parent
980 1001 # class by hand line by line, we need to temporarily switch out to
981 1002 # 'line' mode, do a single manual reset and then feed the lines one
982 1003 # by one. Note that this only matters if the input has more than one
983 1004 # line.
984 1005 changed_input_mode = False
985 1006
986 1007 if self.input_mode == 'cell':
987 1008 self.reset()
988 1009 changed_input_mode = True
989 1010 saved_input_mode = 'cell'
990 1011 self.input_mode = 'line'
991 1012
992 1013 # Store raw source before applying any transformations to it. Note
993 1014 # that this must be done *after* the reset() call that would otherwise
994 1015 # flush the buffer.
995 1016 self._store(lines, self._buffer_raw, 'source_raw')
996 1017
997 1018 try:
998 1019 push = super(IPythonInputSplitter, self).push
999 1020 for line in lines_list:
1000 1021 if self._is_complete or not self._buffer or \
1001 1022 (self._buffer and self._buffer[-1].rstrip().endswith(':')):
1002 1023 for f in transforms:
1003 1024 line = f(line)
1004 1025
1005 1026 out = push(line)
1006 1027 finally:
1007 1028 if changed_input_mode:
1008 1029 self.input_mode = saved_input_mode
1009 1030 return out
General Comments 0
You need to be logged in to leave comments. Login now