##// END OF EJS Templates
Fix bug where 'if 1:' was being added to comment-only code....
Fernando Perez -
Show More
@@ -1,954 +1,960 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63
64 64 #-----------------------------------------------------------------------------
65 65 # Imports
66 66 #-----------------------------------------------------------------------------
67 67 # stdlib
68 68 import codeop
69 69 import re
70 70 import sys
71 71
72 72 # IPython modules
73 73 from IPython.utils.text import make_quoted_expr
74 74 #-----------------------------------------------------------------------------
75 75 # Globals
76 76 #-----------------------------------------------------------------------------
77 77
78 78 # The escape sequences that define the syntax transformations IPython will
79 79 # apply to user input. These can NOT be just changed here: many regular
80 80 # expressions and other parts of the code may use their hardcoded values, and
81 81 # for all intents and purposes they constitute the 'IPython syntax', so they
82 82 # should be considered fixed.
83 83
84 84 ESC_SHELL = '!'
85 85 ESC_SH_CAP = '!!'
86 86 ESC_HELP = '?'
87 87 ESC_HELP2 = '??'
88 88 ESC_MAGIC = '%'
89 89 ESC_QUOTE = ','
90 90 ESC_QUOTE2 = ';'
91 91 ESC_PAREN = '/'
92 92
93 93 #-----------------------------------------------------------------------------
94 94 # Utilities
95 95 #-----------------------------------------------------------------------------
96 96
97 97 # FIXME: These are general-purpose utilities that later can be moved to the
98 98 # general ward. Kept here for now because we're being very strict about test
99 99 # coverage with this code, and this lets us ensure that we keep 100% coverage
100 100 # while developing.
101 101
102 102 # compiled regexps for autoindent management
103 103 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
104 104 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
105 105
106 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
107 # before pure comments
108 comment_line_re = re.compile('^\s*\#')
109
106 110
107 111 def num_ini_spaces(s):
108 112 """Return the number of initial spaces in a string.
109 113
110 114 Note that tabs are counted as a single space. For now, we do *not* support
111 115 mixing of tabs and spaces in the user's input.
112 116
113 117 Parameters
114 118 ----------
115 119 s : string
116 120
117 121 Returns
118 122 -------
119 123 n : int
120 124 """
121 125
122 126 ini_spaces = ini_spaces_re.match(s)
123 127 if ini_spaces:
124 128 return ini_spaces.end()
125 129 else:
126 130 return 0
127 131
128 132
129 133 def remove_comments(src):
130 134 """Remove all comments from input source.
131 135
132 136 Note: comments are NOT recognized inside of strings!
133 137
134 138 Parameters
135 139 ----------
136 140 src : string
137 141 A single or multiline input string.
138 142
139 143 Returns
140 144 -------
141 145 String with all Python comments removed.
142 146 """
143 147
144 148 return re.sub('#.*', '', src)
145 149
146 150
147 151 def get_input_encoding():
148 152 """Return the default standard input encoding.
149 153
150 154 If sys.stdin has no encoding, 'ascii' is returned."""
151 155 # There are strange environments for which sys.stdin.encoding is None. We
152 156 # ensure that a valid encoding is returned.
153 157 encoding = getattr(sys.stdin, 'encoding', None)
154 158 if encoding is None:
155 159 encoding = 'ascii'
156 160 return encoding
157 161
158 162 #-----------------------------------------------------------------------------
159 163 # Classes and functions for normal Python syntax handling
160 164 #-----------------------------------------------------------------------------
161 165
162 166 # HACK! This implementation, written by Robert K a while ago using the
163 167 # compiler module, is more robust than the other one below, but it expects its
164 168 # input to be pure python (no ipython syntax). For now we're using it as a
165 169 # second-pass splitter after the first pass transforms the input to pure
166 170 # python.
167 171
168 172 def split_blocks(python):
169 173 """ Split multiple lines of code into discrete commands that can be
170 174 executed singly.
171 175
172 176 Parameters
173 177 ----------
174 178 python : str
175 179 Pure, exec'able Python code.
176 180
177 181 Returns
178 182 -------
179 183 commands : list of str
180 184 Separate commands that can be exec'ed independently.
181 185 """
182 186
183 187 import compiler
184 188
185 189 # compiler.parse treats trailing spaces after a newline as a
186 190 # SyntaxError. This is different than codeop.CommandCompiler, which
187 191 # will compile the trailng spaces just fine. We simply strip any
188 192 # trailing whitespace off. Passing a string with trailing whitespace
189 193 # to exec will fail however. There seems to be some inconsistency in
190 194 # how trailing whitespace is handled, but this seems to work.
191 195 python_ori = python # save original in case we bail on error
192 196 python = python.strip()
193 197
194 198 # The compiler module does not like unicode. We need to convert
195 199 # it encode it:
196 200 if isinstance(python, unicode):
197 201 # Use the utf-8-sig BOM so the compiler detects this a UTF-8
198 202 # encode string.
199 203 python = '\xef\xbb\xbf' + python.encode('utf-8')
200 204
201 205 # The compiler module will parse the code into an abstract syntax tree.
202 206 # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
203 207 try:
204 208 ast = compiler.parse(python)
205 209 except:
206 210 return [python_ori]
207 211
208 212 # Uncomment to help debug the ast tree
209 213 # for n in ast.node:
210 214 # print n.lineno,'->',n
211 215
212 216 # Each separate command is available by iterating over ast.node. The
213 217 # lineno attribute is the line number (1-indexed) beginning the commands
214 218 # suite.
215 219 # lines ending with ";" yield a Discard Node that doesn't have a lineno
216 220 # attribute. These nodes can and should be discarded. But there are
217 221 # other situations that cause Discard nodes that shouldn't be discarded.
218 222 # We might eventually discover other cases where lineno is None and have
219 223 # to put in a more sophisticated test.
220 224 linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]
221 225
222 226 # When we finally get the slices, we will need to slice all the way to
223 227 # the end even though we don't have a line number for it. Fortunately,
224 228 # None does the job nicely.
225 229 linenos.append(None)
226 230
227 231 # Same problem at the other end: sometimes the ast tree has its
228 232 # first complete statement not starting on line 0. In this case
229 233 # we might miss part of it. This fixes ticket 266993. Thanks Gael!
230 234 linenos[0] = 0
231 235
232 236 lines = python.splitlines()
233 237
234 238 # Create a list of atomic commands.
235 239 cmds = []
236 240 for i, j in zip(linenos[:-1], linenos[1:]):
237 241 cmd = lines[i:j]
238 242 if cmd:
239 243 cmds.append('\n'.join(cmd)+'\n')
240 244
241 245 return cmds
242 246
243 247
244 248 class InputSplitter(object):
245 249 """An object that can split Python source input in executable blocks.
246 250
247 251 This object is designed to be used in one of two basic modes:
248 252
249 253 1. By feeding it python source line-by-line, using :meth:`push`. In this
250 254 mode, it will return on each push whether the currently pushed code
251 255 could be executed already. In addition, it provides a method called
252 256 :meth:`push_accepts_more` that can be used to query whether more input
253 257 can be pushed into a single interactive block.
254 258
255 259 2. By calling :meth:`split_blocks` with a single, multiline Python string,
256 260 that is then split into blocks each of which can be executed
257 261 interactively as a single statement.
258 262
259 263 This is a simple example of how an interactive terminal-based client can use
260 264 this tool::
261 265
262 266 isp = InputSplitter()
263 267 while isp.push_accepts_more():
264 268 indent = ' '*isp.indent_spaces
265 269 prompt = '>>> ' + indent
266 270 line = indent + raw_input(prompt)
267 271 isp.push(line)
268 272 print 'Input source was:\n', isp.source_reset(),
269 273 """
270 274 # Number of spaces of indentation computed from input that has been pushed
271 275 # so far. This is the attributes callers should query to get the current
272 276 # indentation level, in order to provide auto-indent facilities.
273 277 indent_spaces = 0
274 278 # String, indicating the default input encoding. It is computed by default
275 279 # at initialization time via get_input_encoding(), but it can be reset by a
276 280 # client with specific knowledge of the encoding.
277 281 encoding = ''
278 282 # String where the current full source input is stored, properly encoded.
279 283 # Reading this attribute is the normal way of querying the currently pushed
280 284 # source code, that has been properly encoded.
281 285 source = ''
282 286 # Code object corresponding to the current source. It is automatically
283 287 # synced to the source, so it can be queried at any time to obtain the code
284 288 # object; it will be None if the source doesn't compile to valid Python.
285 289 code = None
286 290 # Input mode
287 291 input_mode = 'line'
288 292
289 293 # Private attributes
290 294
291 295 # List with lines of input accumulated so far
292 296 _buffer = None
293 297 # Command compiler
294 298 _compile = None
295 299 # Mark when input has changed indentation all the way back to flush-left
296 300 _full_dedent = False
297 301 # Boolean indicating whether the current block is complete
298 302 _is_complete = None
299 303
300 304 def __init__(self, input_mode=None):
301 305 """Create a new InputSplitter instance.
302 306
303 307 Parameters
304 308 ----------
305 309 input_mode : str
306 310
307 311 One of ['line', 'block']; default is 'line'.
308 312
309 313 The input_mode parameter controls how new inputs are used when fed via
310 314 the :meth:`push` method:
311 315
312 316 - 'line': meant for line-oriented clients, inputs are appended one at a
313 317 time to the internal buffer and the whole buffer is compiled.
314 318
315 319 - 'block': meant for clients that can edit multi-line blocks of text at
316 320 a time. Each new input new input completely replaces all prior
317 321 inputs. Block mode is thus equivalent to prepending a full reset()
318 322 to every push() call.
319 323 """
320 324 self._buffer = []
321 325 self._compile = codeop.CommandCompiler()
322 326 self.encoding = get_input_encoding()
323 327 self.input_mode = InputSplitter.input_mode if input_mode is None \
324 328 else input_mode
325 329
326 330 def reset(self):
327 331 """Reset the input buffer and associated state."""
328 332 self.indent_spaces = 0
329 333 self._buffer[:] = []
330 334 self.source = ''
331 335 self.code = None
332 336 self._is_complete = False
333 337 self._full_dedent = False
334 338
335 339 def source_reset(self):
336 340 """Return the input source and perform a full reset.
337 341 """
338 342 out = self.source
339 343 self.reset()
340 344 return out
341 345
342 346 def push(self, lines):
343 347 """Push one ore more lines of input.
344 348
345 349 This stores the given lines and returns a status code indicating
346 350 whether the code forms a complete Python block or not.
347 351
348 352 Any exceptions generated in compilation are swallowed, but if an
349 353 exception was produced, the method returns True.
350 354
351 355 Parameters
352 356 ----------
353 357 lines : string
354 358 One or more lines of Python input.
355 359
356 360 Returns
357 361 -------
358 362 is_complete : boolean
359 363 True if the current input source (the result of the current input
360 364 plus prior inputs) forms a complete Python execution block. Note that
361 365 this value is also stored as a private attribute (_is_complete), so it
362 366 can be queried at any time.
363 367 """
364 368 if self.input_mode == 'block':
365 369 self.reset()
366 370
367 371 # If the source code has leading blanks, add 'if 1:\n' to it
368 372 # this allows execution of indented pasted code. It is tempting
369 373 # to add '\n' at the end of source to run commands like ' a=1'
370 374 # directly, but this fails for more complicated scenarios
371 if not self._buffer and lines[:1] in [' ', '\t']:
375
376 if not self._buffer and lines[:1] in [' ', '\t'] and \
377 not comment_line_re.match(lines):
372 378 lines = 'if 1:\n%s' % lines
373 379
374 380 self._store(lines)
375 381 source = self.source
376 382
377 383 # Before calling _compile(), reset the code object to None so that if an
378 384 # exception is raised in compilation, we don't mislead by having
379 385 # inconsistent code/source attributes.
380 386 self.code, self._is_complete = None, None
381 387
382 388 self._update_indent(lines)
383 389 try:
384 390 self.code = self._compile(source)
385 391 # Invalid syntax can produce any of a number of different errors from
386 392 # inside the compiler, so we have to catch them all. Syntax errors
387 393 # immediately produce a 'ready' block, so the invalid Python can be
388 394 # sent to the kernel for evaluation with possible ipython
389 395 # special-syntax conversion.
390 396 except (SyntaxError, OverflowError, ValueError, TypeError,
391 397 MemoryError):
392 398 self._is_complete = True
393 399 else:
394 400 # Compilation didn't produce any exceptions (though it may not have
395 401 # given a complete code object)
396 402 self._is_complete = self.code is not None
397 403
398 404 return self._is_complete
399 405
400 406 def push_accepts_more(self):
401 407 """Return whether a block of interactive input can accept more input.
402 408
403 409 This method is meant to be used by line-oriented frontends, who need to
404 410 guess whether a block is complete or not based solely on prior and
405 411 current input lines. The InputSplitter considers it has a complete
406 412 interactive block and will not accept more input only when either a
407 413 SyntaxError is raised, or *all* of the following are true:
408 414
409 415 1. The input compiles to a complete statement.
410 416
411 417 2. The indentation level is flush-left (because if we are indented,
412 418 like inside a function definition or for loop, we need to keep
413 419 reading new input).
414 420
415 421 3. There is one extra line consisting only of whitespace.
416 422
417 423 Because of condition #3, this method should be used only by
418 424 *line-oriented* frontends, since it means that intermediate blank lines
419 425 are not allowed in function definitions (or any other indented block).
420 426
421 427 Block-oriented frontends that have a separate keyboard event to
422 428 indicate execution should use the :meth:`split_blocks` method instead.
423 429
424 430 If the current input produces a syntax error, this method immediately
425 431 returns False but does *not* raise the syntax error exception, as
426 432 typically clients will want to send invalid syntax to an execution
427 433 backend which might convert the invalid syntax into valid Python via
428 434 one of the dynamic IPython mechanisms.
429 435 """
430 436
431 437 if not self._is_complete:
432 438 return True
433 439
434 440 if self.indent_spaces==0:
435 441 return False
436 442
437 443 last_line = self.source.splitlines()[-1]
438 444 return bool(last_line and not last_line.isspace())
439 445
440 446 def split_blocks(self, lines):
441 447 """Split a multiline string into multiple input blocks.
442 448
443 449 Note: this method starts by performing a full reset().
444 450
445 451 Parameters
446 452 ----------
447 453 lines : str
448 454 A possibly multiline string.
449 455
450 456 Returns
451 457 -------
452 458 blocks : list
453 459 A list of strings, each possibly multiline. Each string corresponds
454 460 to a single block that can be compiled in 'single' mode (unless it
455 461 has a syntax error)."""
456 462
457 463 # This code is fairly delicate. If you make any changes here, make
458 464 # absolutely sure that you do run the full test suite and ALL tests
459 465 # pass.
460 466
461 467 self.reset()
462 468 blocks = []
463 469
464 470 # Reversed copy so we can use pop() efficiently and consume the input
465 471 # as a stack
466 472 lines = lines.splitlines()[::-1]
467 473 # Outer loop over all input
468 474 while lines:
469 475 #print 'Current lines:', lines # dbg
470 476 # Inner loop to build each block
471 477 while True:
472 478 # Safety exit from inner loop
473 479 if not lines:
474 480 break
475 481 # Grab next line but don't push it yet
476 482 next_line = lines.pop()
477 483 # Blank/empty lines are pushed as-is
478 484 if not next_line or next_line.isspace():
479 485 self.push(next_line)
480 486 continue
481 487
482 488 # Check indentation changes caused by the *next* line
483 489 indent_spaces, _full_dedent = self._find_indent(next_line)
484 490
485 491 # If the next line causes a dedent, it can be for two differnt
486 492 # reasons: either an explicit de-dent by the user or a
487 493 # return/raise/pass statement. These MUST be handled
488 494 # separately:
489 495 #
490 496 # 1. the first case is only detected when the actual explicit
491 497 # dedent happens, and that would be the *first* line of a *new*
492 498 # block. Thus, we must put the line back into the input buffer
493 499 # so that it starts a new block on the next pass.
494 500 #
495 501 # 2. the second case is detected in the line before the actual
496 502 # dedent happens, so , we consume the line and we can break out
497 503 # to start a new block.
498 504
499 505 # Case 1, explicit dedent causes a break.
500 506 # Note: check that we weren't on the very last line, else we'll
501 507 # enter an infinite loop adding/removing the last line.
502 508 if _full_dedent and lines and not next_line.startswith(' '):
503 509 lines.append(next_line)
504 510 break
505 511
506 512 # Otherwise any line is pushed
507 513 self.push(next_line)
508 514
509 515 # Case 2, full dedent with full block ready:
510 516 if _full_dedent or \
511 517 self.indent_spaces==0 and not self.push_accepts_more():
512 518 break
513 519 # Form the new block with the current source input
514 520 blocks.append(self.source_reset())
515 521
516 522 #return blocks
517 523 # HACK!!! Now that our input is in blocks but guaranteed to be pure
518 524 # python syntax, feed it back a second time through the AST-based
519 525 # splitter, which is more accurate than ours.
520 526 return split_blocks(''.join(blocks))
521 527
522 528 #------------------------------------------------------------------------
523 529 # Private interface
524 530 #------------------------------------------------------------------------
525 531
526 532 def _find_indent(self, line):
527 533 """Compute the new indentation level for a single line.
528 534
529 535 Parameters
530 536 ----------
531 537 line : str
532 538 A single new line of non-whitespace, non-comment Python input.
533 539
534 540 Returns
535 541 -------
536 542 indent_spaces : int
537 543 New value for the indent level (it may be equal to self.indent_spaces
538 544 if indentation doesn't change.
539 545
540 546 full_dedent : boolean
541 547 Whether the new line causes a full flush-left dedent.
542 548 """
543 549 indent_spaces = self.indent_spaces
544 550 full_dedent = self._full_dedent
545 551
546 552 inisp = num_ini_spaces(line)
547 553 if inisp < indent_spaces:
548 554 indent_spaces = inisp
549 555 if indent_spaces <= 0:
550 556 #print 'Full dedent in text',self.source # dbg
551 557 full_dedent = True
552 558
553 559 if line[-1] == ':':
554 560 indent_spaces += 4
555 561 elif dedent_re.match(line):
556 562 indent_spaces -= 4
557 563 if indent_spaces <= 0:
558 564 full_dedent = True
559 565
560 566 # Safety
561 567 if indent_spaces < 0:
562 568 indent_spaces = 0
563 569 #print 'safety' # dbg
564 570
565 571 return indent_spaces, full_dedent
566 572
567 573 def _update_indent(self, lines):
568 574 for line in remove_comments(lines).splitlines():
569 575 if line and not line.isspace():
570 576 self.indent_spaces, self._full_dedent = self._find_indent(line)
571 577
572 578 def _store(self, lines):
573 579 """Store one or more lines of input.
574 580
575 581 If input lines are not newline-terminated, a newline is automatically
576 582 appended."""
577 583
578 584 if lines.endswith('\n'):
579 585 self._buffer.append(lines)
580 586 else:
581 587 self._buffer.append(lines+'\n')
582 588 self._set_source()
583 589
584 590 def _set_source(self):
585 591 self.source = ''.join(self._buffer).encode(self.encoding)
586 592
587 593
588 594 #-----------------------------------------------------------------------------
589 595 # Functions and classes for IPython-specific syntactic support
590 596 #-----------------------------------------------------------------------------
591 597
592 598 # RegExp for splitting line contents into pre-char//first word-method//rest.
593 599 # For clarity, each group in on one line.
594 600
595 601 line_split = re.compile("""
596 602 ^(\s*) # any leading space
597 603 ([,;/%]|!!?|\?\??) # escape character or characters
598 604 \s*(%?[\w\.]*) # function/method, possibly with leading %
599 605 # to correctly treat things like '?%magic'
600 606 (\s+.*$|$) # rest of line
601 607 """, re.VERBOSE)
602 608
603 609
604 610 def split_user_input(line):
605 611 """Split user input into early whitespace, esc-char, function part and rest.
606 612
607 613 This is currently handles lines with '=' in them in a very inconsistent
608 614 manner.
609 615
610 616 Examples
611 617 ========
612 618 >>> split_user_input('x=1')
613 619 ('', '', 'x=1', '')
614 620 >>> split_user_input('?')
615 621 ('', '?', '', '')
616 622 >>> split_user_input('??')
617 623 ('', '??', '', '')
618 624 >>> split_user_input(' ?')
619 625 (' ', '?', '', '')
620 626 >>> split_user_input(' ??')
621 627 (' ', '??', '', '')
622 628 >>> split_user_input('??x')
623 629 ('', '??', 'x', '')
624 630 >>> split_user_input('?x=1')
625 631 ('', '', '?x=1', '')
626 632 >>> split_user_input('!ls')
627 633 ('', '!', 'ls', '')
628 634 >>> split_user_input(' !ls')
629 635 (' ', '!', 'ls', '')
630 636 >>> split_user_input('!!ls')
631 637 ('', '!!', 'ls', '')
632 638 >>> split_user_input(' !!ls')
633 639 (' ', '!!', 'ls', '')
634 640 >>> split_user_input(',ls')
635 641 ('', ',', 'ls', '')
636 642 >>> split_user_input(';ls')
637 643 ('', ';', 'ls', '')
638 644 >>> split_user_input(' ;ls')
639 645 (' ', ';', 'ls', '')
640 646 >>> split_user_input('f.g(x)')
641 647 ('', '', 'f.g(x)', '')
642 648 >>> split_user_input('f.g (x)')
643 649 ('', '', 'f.g', '(x)')
644 650 >>> split_user_input('?%hist')
645 651 ('', '?', '%hist', '')
646 652 """
647 653 match = line_split.match(line)
648 654 if match:
649 655 lspace, esc, fpart, rest = match.groups()
650 656 else:
651 657 # print "match failed for line '%s'" % line
652 658 try:
653 659 fpart, rest = line.split(None, 1)
654 660 except ValueError:
655 661 # print "split failed for line '%s'" % line
656 662 fpart, rest = line,''
657 663 lspace = re.match('^(\s*)(.*)', line).groups()[0]
658 664 esc = ''
659 665
660 666 # fpart has to be a valid python identifier, so it better be only pure
661 667 # ascii, no unicode:
662 668 try:
663 669 fpart = fpart.encode('ascii')
664 670 except UnicodeEncodeError:
665 671 lspace = unicode(lspace)
666 672 rest = fpart + u' ' + rest
667 673 fpart = u''
668 674
669 675 #print 'line:<%s>' % line # dbg
670 676 #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg
671 677 return lspace, esc, fpart.strip(), rest.lstrip()
672 678
673 679
674 680 # The escaped translators ALL receive a line where their own escape has been
675 681 # stripped. Only '?' is valid at the end of the line, all others can only be
676 682 # placed at the start.
677 683
678 684 class LineInfo(object):
679 685 """A single line of input and associated info.
680 686
681 687 This is a utility class that mostly wraps the output of
682 688 :func:`split_user_input` into a convenient object to be passed around
683 689 during input transformations.
684 690
685 691 Includes the following as properties:
686 692
687 693 line
688 694 The original, raw line
689 695
690 696 lspace
691 697 Any early whitespace before actual text starts.
692 698
693 699 esc
694 700 The initial esc character (or characters, for double-char escapes like
695 701 '??' or '!!').
696 702
697 703 fpart
698 704 The 'function part', which is basically the maximal initial sequence
699 705 of valid python identifiers and the '.' character. This is what is
700 706 checked for alias and magic transformations, used for auto-calling,
701 707 etc.
702 708
703 709 rest
704 710 Everything else on the line.
705 711 """
706 712 def __init__(self, line):
707 713 self.line = line
708 714 self.lspace, self.esc, self.fpart, self.rest = \
709 715 split_user_input(line)
710 716
711 717 def __str__(self):
712 718 return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc,
713 719 self.fpart, self.rest)
714 720
715 721
716 722 # Transformations of the special syntaxes that don't rely on an explicit escape
717 723 # character but instead on patterns on the input line
718 724
719 725 # The core transformations are implemented as standalone functions that can be
720 726 # tested and validated in isolation. Each of these uses a regexp, we
721 727 # pre-compile these and keep them close to each function definition for clarity
722 728
723 729 _assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
724 730 r'\s*=\s*!\s*(?P<cmd>.*)')
725 731
726 732 def transform_assign_system(line):
727 733 """Handle the `files = !ls` syntax."""
728 734 # FIXME: This transforms the line to use %sc, but we've listed that magic
729 735 # as deprecated. We should then implement this functionality in a
730 736 # standalone api that we can transform to, without going through a
731 737 # deprecated magic.
732 738 m = _assign_system_re.match(line)
733 739 if m is not None:
734 740 cmd = m.group('cmd')
735 741 lhs = m.group('lhs')
736 742 expr = make_quoted_expr("sc -l = %s" % cmd)
737 743 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
738 744 return new_line
739 745 return line
740 746
741 747
742 748 _assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
743 749 r'\s*=\s*%\s*(?P<cmd>.*)')
744 750
745 751 def transform_assign_magic(line):
746 752 """Handle the `a = %who` syntax."""
747 753 m = _assign_magic_re.match(line)
748 754 if m is not None:
749 755 cmd = m.group('cmd')
750 756 lhs = m.group('lhs')
751 757 expr = make_quoted_expr(cmd)
752 758 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
753 759 return new_line
754 760 return line
755 761
756 762
757 763 _classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )')
758 764
759 765 def transform_classic_prompt(line):
760 766 """Handle inputs that start with '>>> ' syntax."""
761 767
762 768 if not line or line.isspace():
763 769 return line
764 770 m = _classic_prompt_re.match(line)
765 771 if m:
766 772 return line[len(m.group(0)):]
767 773 else:
768 774 return line
769 775
770 776
771 777 _ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
772 778
773 779 def transform_ipy_prompt(line):
774 780 """Handle inputs that start classic IPython prompt syntax."""
775 781
776 782 if not line or line.isspace():
777 783 return line
778 784 #print 'LINE: %r' % line # dbg
779 785 m = _ipy_prompt_re.match(line)
780 786 if m:
781 787 #print 'MATCH! %r -> %r' % (line, line[len(m.group(0)):]) # dbg
782 788 return line[len(m.group(0)):]
783 789 else:
784 790 return line
785 791
786 792
787 793 class EscapedTransformer(object):
788 794 """Class to transform lines that are explicitly escaped out."""
789 795
790 796 def __init__(self):
791 797 tr = { ESC_SHELL : self._tr_system,
792 798 ESC_SH_CAP : self._tr_system2,
793 799 ESC_HELP : self._tr_help,
794 800 ESC_HELP2 : self._tr_help,
795 801 ESC_MAGIC : self._tr_magic,
796 802 ESC_QUOTE : self._tr_quote,
797 803 ESC_QUOTE2 : self._tr_quote2,
798 804 ESC_PAREN : self._tr_paren }
799 805 self.tr = tr
800 806
801 807 # Support for syntax transformations that use explicit escapes typed by the
802 808 # user at the beginning of a line
803 809 @staticmethod
804 810 def _tr_system(line_info):
805 811 "Translate lines escaped with: !"
806 812 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
807 813 return '%sget_ipython().system(%s)' % (line_info.lspace,
808 814 make_quoted_expr(cmd))
809 815
810 816 @staticmethod
811 817 def _tr_system2(line_info):
812 818 "Translate lines escaped with: !!"
813 819 cmd = line_info.line.lstrip()[2:]
814 820 return '%sget_ipython().getoutput(%s)' % (line_info.lspace,
815 821 make_quoted_expr(cmd))
816 822
817 823 @staticmethod
818 824 def _tr_help(line_info):
819 825 "Translate lines escaped with: ?/??"
820 826 # A naked help line should just fire the intro help screen
821 827 if not line_info.line[1:]:
822 828 return 'get_ipython().show_usage()'
823 829
824 830 # There may be one or two '?' at the end, move them to the front so that
825 831 # the rest of the logic can assume escapes are at the start
826 832 line = line_info.line
827 833 if line.endswith('?'):
828 834 line = line[-1] + line[:-1]
829 835 if line.endswith('?'):
830 836 line = line[-1] + line[:-1]
831 837 line_info = LineInfo(line)
832 838
833 839 # From here on, simply choose which level of detail to get.
834 840 if line_info.esc == '?':
835 841 pinfo = 'pinfo'
836 842 elif line_info.esc == '??':
837 843 pinfo = 'pinfo2'
838 844
839 845 tpl = '%sget_ipython().magic("%s %s")'
840 846 return tpl % (line_info.lspace, pinfo,
841 847 ' '.join([line_info.fpart, line_info.rest]).strip())
842 848
843 849 @staticmethod
844 850 def _tr_magic(line_info):
845 851 "Translate lines escaped with: %"
846 852 tpl = '%sget_ipython().magic(%s)'
847 853 cmd = make_quoted_expr(' '.join([line_info.fpart,
848 854 line_info.rest]).strip())
849 855 return tpl % (line_info.lspace, cmd)
850 856
851 857 @staticmethod
852 858 def _tr_quote(line_info):
853 859 "Translate lines escaped with: ,"
854 860 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
855 861 '", "'.join(line_info.rest.split()) )
856 862
857 863 @staticmethod
858 864 def _tr_quote2(line_info):
859 865 "Translate lines escaped with: ;"
860 866 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
861 867 line_info.rest)
862 868
863 869 @staticmethod
864 870 def _tr_paren(line_info):
865 871 "Translate lines escaped with: /"
866 872 return '%s%s(%s)' % (line_info.lspace, line_info.fpart,
867 873 ", ".join(line_info.rest.split()))
868 874
869 875 def __call__(self, line):
870 876 """Class to transform lines that are explicitly escaped out.
871 877
872 878 This calls the above _tr_* static methods for the actual line
873 879 translations."""
874 880
875 881 # Empty lines just get returned unmodified
876 882 if not line or line.isspace():
877 883 return line
878 884
879 885 # Get line endpoints, where the escapes can be
880 886 line_info = LineInfo(line)
881 887
882 888 # If the escape is not at the start, only '?' needs to be special-cased.
883 889 # All other escapes are only valid at the start
884 890 if not line_info.esc in self.tr:
885 891 if line.endswith(ESC_HELP):
886 892 return self._tr_help(line_info)
887 893 else:
888 894 # If we don't recognize the escape, don't modify the line
889 895 return line
890 896
891 897 return self.tr[line_info.esc](line_info)
892 898
893 899
894 900 # A function-looking object to be used by the rest of the code. The purpose of
895 901 # the class in this case is to organize related functionality, more than to
896 902 # manage state.
897 903 transform_escaped = EscapedTransformer()
898 904
899 905
900 906 class IPythonInputSplitter(InputSplitter):
901 907 """An input splitter that recognizes all of IPython's special syntax."""
902 908
903 909 def push(self, lines):
904 910 """Push one or more lines of IPython input.
905 911 """
906 912 if not lines:
907 913 return super(IPythonInputSplitter, self).push(lines)
908 914
909 915 lines_list = lines.splitlines()
910 916
911 917 transforms = [transform_escaped, transform_assign_system,
912 918 transform_assign_magic, transform_ipy_prompt,
913 919 transform_classic_prompt]
914 920
915 921 # Transform logic
916 922 #
917 923 # We only apply the line transformers to the input if we have either no
918 924 # input yet, or complete input, or if the last line of the buffer ends
919 925 # with ':' (opening an indented block). This prevents the accidental
920 926 # transformation of escapes inside multiline expressions like
921 927 # triple-quoted strings or parenthesized expressions.
922 928 #
923 929 # The last heuristic, while ugly, ensures that the first line of an
924 930 # indented block is correctly transformed.
925 931 #
926 932 # FIXME: try to find a cleaner approach for this last bit.
927 933
928 934 # If we were in 'block' mode, since we're going to pump the parent
929 935 # class by hand line by line, we need to temporarily switch out to
930 936 # 'line' mode, do a single manual reset and then feed the lines one
931 937 # by one. Note that this only matters if the input has more than one
932 938 # line.
933 939 changed_input_mode = False
934 940
935 941 if len(lines_list)>1 and self.input_mode == 'block':
936 942 self.reset()
937 943 changed_input_mode = True
938 944 saved_input_mode = 'block'
939 945 self.input_mode = 'line'
940 946
941 947 try:
942 948 push = super(IPythonInputSplitter, self).push
943 949 for line in lines_list:
944 950 if self._is_complete or not self._buffer or \
945 951 (self._buffer and self._buffer[-1].rstrip().endswith(':')):
946 952 for f in transforms:
947 953 line = f(line)
948 954
949 955 out = push(line)
950 956 finally:
951 957 if changed_input_mode:
952 958 self.input_mode = saved_input_mode
953 959
954 960 return out
General Comments 0
You need to be logged in to leave comments. Login now