##// END OF EJS Templates
Fix for unicode in inputsplitter.
Thomas Kluyver -
Show More
@@ -1,1021 +1,1023 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63 from __future__ import print_function
64 64
65 65 #-----------------------------------------------------------------------------
66 66 # Imports
67 67 #-----------------------------------------------------------------------------
68 68 # stdlib
69 69 import codeop
70 70 import re
71 71 import sys
72 72
73 73 # IPython modules
74 74 from IPython.utils.text import make_quoted_expr
75 75
76 76 #-----------------------------------------------------------------------------
77 77 # Globals
78 78 #-----------------------------------------------------------------------------
79 79
80 80 # The escape sequences that define the syntax transformations IPython will
81 81 # apply to user input. These can NOT be just changed here: many regular
82 82 # expressions and other parts of the code may use their hardcoded values, and
83 83 # for all intents and purposes they constitute the 'IPython syntax', so they
84 84 # should be considered fixed.
85 85
86 86 ESC_SHELL = '!' # Send line to underlying system shell
87 87 ESC_SH_CAP = '!!' # Send line to system shell and capture output
88 88 ESC_HELP = '?' # Find information about object
89 89 ESC_HELP2 = '??' # Find extra-detailed information about object
90 90 ESC_MAGIC = '%' # Call magic function
91 91 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
92 92 ESC_QUOTE2 = ';' # Quote all args as a single string, call
93 93 ESC_PAREN = '/' # Call first argument with rest of line as arguments
94 94
95 95 #-----------------------------------------------------------------------------
96 96 # Utilities
97 97 #-----------------------------------------------------------------------------
98 98
99 99 # FIXME: These are general-purpose utilities that later can be moved to the
100 100 # general ward. Kept here for now because we're being very strict about test
101 101 # coverage with this code, and this lets us ensure that we keep 100% coverage
102 102 # while developing.
103 103
104 104 # compiled regexps for autoindent management
105 105 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
106 106 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
107 107
108 108 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
109 109 # before pure comments
110 110 comment_line_re = re.compile('^\s*\#')
111 111
112 112
113 113 def num_ini_spaces(s):
114 114 """Return the number of initial spaces in a string.
115 115
116 116 Note that tabs are counted as a single space. For now, we do *not* support
117 117 mixing of tabs and spaces in the user's input.
118 118
119 119 Parameters
120 120 ----------
121 121 s : string
122 122
123 123 Returns
124 124 -------
125 125 n : int
126 126 """
127 127
128 128 ini_spaces = ini_spaces_re.match(s)
129 129 if ini_spaces:
130 130 return ini_spaces.end()
131 131 else:
132 132 return 0
133 133
134 134
135 135 def remove_comments(src):
136 136 """Remove all comments from input source.
137 137
138 138 Note: comments are NOT recognized inside of strings!
139 139
140 140 Parameters
141 141 ----------
142 142 src : string
143 143 A single or multiline input string.
144 144
145 145 Returns
146 146 -------
147 147 String with all Python comments removed.
148 148 """
149 149
150 150 return re.sub('#.*', '', src)
151 151
152 152
153 153 def get_input_encoding():
154 154 """Return the default standard input encoding.
155 155
156 156 If sys.stdin has no encoding, 'ascii' is returned."""
157 157 # There are strange environments for which sys.stdin.encoding is None. We
158 158 # ensure that a valid encoding is returned.
159 159 encoding = getattr(sys.stdin, 'encoding', None)
160 160 if encoding is None:
161 161 encoding = 'ascii'
162 162 return encoding
163 163
164 164 #-----------------------------------------------------------------------------
165 165 # Classes and functions for normal Python syntax handling
166 166 #-----------------------------------------------------------------------------
167 167
168 168 # HACK! This implementation, written by Robert K a while ago using the
169 169 # compiler module, is more robust than the other one below, but it expects its
170 170 # input to be pure python (no ipython syntax). For now we're using it as a
171 171 # second-pass splitter after the first pass transforms the input to pure
172 172 # python.
173 173
174 174 def split_blocks(python):
175 175 """ Split multiple lines of code into discrete commands that can be
176 176 executed singly.
177 177
178 178 Parameters
179 179 ----------
180 180 python : str
181 181 Pure, exec'able Python code.
182 182
183 183 Returns
184 184 -------
185 185 commands : list of str
186 186 Separate commands that can be exec'ed independently.
187 187 """
188 188
189 189 import compiler
190 190
191 191 # compiler.parse treats trailing spaces after a newline as a
192 192 # SyntaxError. This is different than codeop.CommandCompiler, which
193 193 # will compile the trailng spaces just fine. We simply strip any
194 194 # trailing whitespace off. Passing a string with trailing whitespace
195 195 # to exec will fail however. There seems to be some inconsistency in
196 196 # how trailing whitespace is handled, but this seems to work.
197 197 python_ori = python # save original in case we bail on error
198 198 python = python.strip()
199 199
200 200 # The compiler module does not like unicode. We need to convert
201 201 # it encode it:
202 202 if isinstance(python, unicode):
203 203 # Use the utf-8-sig BOM so the compiler detects this a UTF-8
204 204 # encode string.
205 205 python = '\xef\xbb\xbf' + python.encode('utf-8')
206 206
207 207 # The compiler module will parse the code into an abstract syntax tree.
208 208 # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
209 209 try:
210 210 ast = compiler.parse(python)
211 211 except:
212 212 return [python_ori]
213 213
214 214 # Uncomment to help debug the ast tree
215 215 # for n in ast.node:
216 216 # print n.lineno,'->',n
217 217
218 218 # Each separate command is available by iterating over ast.node. The
219 219 # lineno attribute is the line number (1-indexed) beginning the commands
220 220 # suite.
221 221 # lines ending with ";" yield a Discard Node that doesn't have a lineno
222 222 # attribute. These nodes can and should be discarded. But there are
223 223 # other situations that cause Discard nodes that shouldn't be discarded.
224 224 # We might eventually discover other cases where lineno is None and have
225 225 # to put in a more sophisticated test.
226 226 linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]
227 227
228 228 # When we have a bare string as the first statement, it does not end up as
229 229 # a Discard Node in the AST as we might expect. Instead, it gets interpreted
230 230 # as the docstring of the module. Check for this case and prepend 0 (the
231 231 # first line number) to the list of linenos to account for it.
232 232 if ast.doc is not None:
233 233 linenos.insert(0, 0)
234 234
235 235 # When we finally get the slices, we will need to slice all the way to
236 236 # the end even though we don't have a line number for it. Fortunately,
237 237 # None does the job nicely.
238 238 linenos.append(None)
239 239
240 240 # Same problem at the other end: sometimes the ast tree has its
241 241 # first complete statement not starting on line 0. In this case
242 242 # we might miss part of it. This fixes ticket 266993. Thanks Gael!
243 243 linenos[0] = 0
244 244
245 245 lines = python.splitlines()
246 246
247 247 # Create a list of atomic commands.
248 248 cmds = []
249 249 for i, j in zip(linenos[:-1], linenos[1:]):
250 250 cmd = lines[i:j]
251 251 if cmd:
252 252 cmds.append('\n'.join(cmd)+'\n')
253 253
254 254 return cmds
255 255
256 256
257 257 class InputSplitter(object):
258 258 """An object that can split Python source input in executable blocks.
259 259
260 260 This object is designed to be used in one of two basic modes:
261 261
262 262 1. By feeding it python source line-by-line, using :meth:`push`. In this
263 263 mode, it will return on each push whether the currently pushed code
264 264 could be executed already. In addition, it provides a method called
265 265 :meth:`push_accepts_more` that can be used to query whether more input
266 266 can be pushed into a single interactive block.
267 267
268 268 2. By calling :meth:`split_blocks` with a single, multiline Python string,
269 269 that is then split into blocks each of which can be executed
270 270 interactively as a single statement.
271 271
272 272 This is a simple example of how an interactive terminal-based client can use
273 273 this tool::
274 274
275 275 isp = InputSplitter()
276 276 while isp.push_accepts_more():
277 277 indent = ' '*isp.indent_spaces
278 278 prompt = '>>> ' + indent
279 279 line = indent + raw_input(prompt)
280 280 isp.push(line)
281 281 print 'Input source was:\n', isp.source_reset(),
282 282 """
283 283 # Number of spaces of indentation computed from input that has been pushed
284 284 # so far. This is the attributes callers should query to get the current
285 285 # indentation level, in order to provide auto-indent facilities.
286 286 indent_spaces = 0
287 287 # String, indicating the default input encoding. It is computed by default
288 288 # at initialization time via get_input_encoding(), but it can be reset by a
289 289 # client with specific knowledge of the encoding.
290 290 encoding = ''
291 291 # String where the current full source input is stored, properly encoded.
292 292 # Reading this attribute is the normal way of querying the currently pushed
293 293 # source code, that has been properly encoded.
294 294 source = ''
295 295 # Code object corresponding to the current source. It is automatically
296 296 # synced to the source, so it can be queried at any time to obtain the code
297 297 # object; it will be None if the source doesn't compile to valid Python.
298 298 code = None
299 299 # Input mode
300 300 input_mode = 'line'
301 301
302 302 # Private attributes
303 303
304 304 # List with lines of input accumulated so far
305 305 _buffer = None
306 306 # Command compiler
307 307 _compile = None
308 308 # Mark when input has changed indentation all the way back to flush-left
309 309 _full_dedent = False
310 310 # Boolean indicating whether the current block is complete
311 311 _is_complete = None
312 312
313 313 def __init__(self, input_mode=None):
314 314 """Create a new InputSplitter instance.
315 315
316 316 Parameters
317 317 ----------
318 318 input_mode : str
319 319
320 320 One of ['line', 'cell']; default is 'line'.
321 321
322 322 The input_mode parameter controls how new inputs are used when fed via
323 323 the :meth:`push` method:
324 324
325 325 - 'line': meant for line-oriented clients, inputs are appended one at a
326 326 time to the internal buffer and the whole buffer is compiled.
327 327
328 328 - 'cell': meant for clients that can edit multi-line 'cells' of text at
329 329 a time. A cell can contain one or more blocks that can be compile in
330 330 'single' mode by Python. In this mode, each new input new input
331 331 completely replaces all prior inputs. Cell mode is thus equivalent
332 332 to prepending a full reset() to every push() call.
333 333 """
334 334 self._buffer = []
335 335 self._compile = codeop.CommandCompiler()
336 336 self.encoding = get_input_encoding()
337 337 self.input_mode = InputSplitter.input_mode if input_mode is None \
338 338 else input_mode
339 339
340 340 def reset(self):
341 341 """Reset the input buffer and associated state."""
342 342 self.indent_spaces = 0
343 343 self._buffer[:] = []
344 344 self.source = ''
345 345 self.code = None
346 346 self._is_complete = False
347 347 self._full_dedent = False
348 348
349 349 def source_reset(self):
350 350 """Return the input source and perform a full reset.
351 351 """
352 352 out = self.source
353 353 self.reset()
354 354 return out
355 355
356 356 def push(self, lines):
357 357 """Push one or more lines of input.
358 358
359 359 This stores the given lines and returns a status code indicating
360 360 whether the code forms a complete Python block or not.
361 361
362 362 Any exceptions generated in compilation are swallowed, but if an
363 363 exception was produced, the method returns True.
364 364
365 365 Parameters
366 366 ----------
367 367 lines : string
368 368 One or more lines of Python input.
369 369
370 370 Returns
371 371 -------
372 372 is_complete : boolean
373 373 True if the current input source (the result of the current input
374 374 plus prior inputs) forms a complete Python execution block. Note that
375 375 this value is also stored as a private attribute (_is_complete), so it
376 376 can be queried at any time.
377 377 """
378 378 if self.input_mode == 'cell':
379 379 self.reset()
380 380
381 381 self._store(lines)
382 382 source = self.source
383 383
384 384 # Before calling _compile(), reset the code object to None so that if an
385 385 # exception is raised in compilation, we don't mislead by having
386 386 # inconsistent code/source attributes.
387 387 self.code, self._is_complete = None, None
388 388
389 389 # Honor termination lines properly
390 390 if source.rstrip().endswith('\\'):
391 391 return False
392 392
393 393 self._update_indent(lines)
394 394 try:
395 395 self.code = self._compile(source)
396 396 # Invalid syntax can produce any of a number of different errors from
397 397 # inside the compiler, so we have to catch them all. Syntax errors
398 398 # immediately produce a 'ready' block, so the invalid Python can be
399 399 # sent to the kernel for evaluation with possible ipython
400 400 # special-syntax conversion.
401 401 except (SyntaxError, OverflowError, ValueError, TypeError,
402 402 MemoryError):
403 403 self._is_complete = True
404 404 else:
405 405 # Compilation didn't produce any exceptions (though it may not have
406 406 # given a complete code object)
407 407 self._is_complete = self.code is not None
408 408
409 409 return self._is_complete
410 410
411 411 def push_accepts_more(self):
412 412 """Return whether a block of interactive input can accept more input.
413 413
414 414 This method is meant to be used by line-oriented frontends, who need to
415 415 guess whether a block is complete or not based solely on prior and
416 416 current input lines. The InputSplitter considers it has a complete
417 417 interactive block and will not accept more input only when either a
418 418 SyntaxError is raised, or *all* of the following are true:
419 419
420 420 1. The input compiles to a complete statement.
421 421
422 422 2. The indentation level is flush-left (because if we are indented,
423 423 like inside a function definition or for loop, we need to keep
424 424 reading new input).
425 425
426 426 3. There is one extra line consisting only of whitespace.
427 427
428 428 Because of condition #3, this method should be used only by
429 429 *line-oriented* frontends, since it means that intermediate blank lines
430 430 are not allowed in function definitions (or any other indented block).
431 431
432 432 Block-oriented frontends that have a separate keyboard event to
433 433 indicate execution should use the :meth:`split_blocks` method instead.
434 434
435 435 If the current input produces a syntax error, this method immediately
436 436 returns False but does *not* raise the syntax error exception, as
437 437 typically clients will want to send invalid syntax to an execution
438 438 backend which might convert the invalid syntax into valid Python via
439 439 one of the dynamic IPython mechanisms.
440 440 """
441 441
442 442 # With incomplete input, unconditionally accept more
443 443 if not self._is_complete:
444 444 return True
445 445
446 446 # If we already have complete input and we're flush left, the answer
447 447 # depends. In line mode, we're done. But in cell mode, we need to
448 448 # check how many blocks the input so far compiles into, because if
449 449 # there's already more than one full independent block of input, then
450 450 # the client has entered full 'cell' mode and is feeding lines that
451 451 # each is complete. In this case we should then keep accepting.
452 452 # The Qt terminal-like console does precisely this, to provide the
453 453 # convenience of terminal-like input of single expressions, but
454 454 # allowing the user (with a separate keystroke) to switch to 'cell'
455 455 # mode and type multiple expressions in one shot.
456 456 if self.indent_spaces==0:
457 457 if self.input_mode=='line':
458 458 return False
459 459 else:
460 460 nblocks = len(split_blocks(''.join(self._buffer)))
461 461 if nblocks==1:
462 462 return False
463 463
464 464 # When input is complete, then termination is marked by an extra blank
465 465 # line at the end.
466 466 last_line = self.source.splitlines()[-1]
467 467 return bool(last_line and not last_line.isspace())
468 468
469 469 def split_blocks(self, lines):
470 470 """Split a multiline string into multiple input blocks.
471 471
472 472 Note: this method starts by performing a full reset().
473 473
474 474 Parameters
475 475 ----------
476 476 lines : str
477 477 A possibly multiline string.
478 478
479 479 Returns
480 480 -------
481 481 blocks : list
482 482 A list of strings, each possibly multiline. Each string corresponds
483 483 to a single block that can be compiled in 'single' mode (unless it
484 484 has a syntax error)."""
485 485
486 486 # This code is fairly delicate. If you make any changes here, make
487 487 # absolutely sure that you do run the full test suite and ALL tests
488 488 # pass.
489 489
490 490 self.reset()
491 491 blocks = []
492 492
493 493 # Reversed copy so we can use pop() efficiently and consume the input
494 494 # as a stack
495 495 lines = lines.splitlines()[::-1]
496 496 # Outer loop over all input
497 497 while lines:
498 498 #print 'Current lines:', lines # dbg
499 499 # Inner loop to build each block
500 500 while True:
501 501 # Safety exit from inner loop
502 502 if not lines:
503 503 break
504 504 # Grab next line but don't push it yet
505 505 next_line = lines.pop()
506 506 # Blank/empty lines are pushed as-is
507 507 if not next_line or next_line.isspace():
508 508 self.push(next_line)
509 509 continue
510 510
511 511 # Check indentation changes caused by the *next* line
512 512 indent_spaces, _full_dedent = self._find_indent(next_line)
513 513
514 514 # If the next line causes a dedent, it can be for two differnt
515 515 # reasons: either an explicit de-dent by the user or a
516 516 # return/raise/pass statement. These MUST be handled
517 517 # separately:
518 518 #
519 519 # 1. the first case is only detected when the actual explicit
520 520 # dedent happens, and that would be the *first* line of a *new*
521 521 # block. Thus, we must put the line back into the input buffer
522 522 # so that it starts a new block on the next pass.
523 523 #
524 524 # 2. the second case is detected in the line before the actual
525 525 # dedent happens, so , we consume the line and we can break out
526 526 # to start a new block.
527 527
528 528 # Case 1, explicit dedent causes a break.
529 529 # Note: check that we weren't on the very last line, else we'll
530 530 # enter an infinite loop adding/removing the last line.
531 531 if _full_dedent and lines and not next_line.startswith(' '):
532 532 lines.append(next_line)
533 533 break
534 534
535 535 # Otherwise any line is pushed
536 536 self.push(next_line)
537 537
538 538 # Case 2, full dedent with full block ready:
539 539 if _full_dedent or \
540 540 self.indent_spaces==0 and not self.push_accepts_more():
541 541 break
542 542 # Form the new block with the current source input
543 543 blocks.append(self.source_reset())
544 544
545 545 #return blocks
546 546 # HACK!!! Now that our input is in blocks but guaranteed to be pure
547 547 # python syntax, feed it back a second time through the AST-based
548 548 # splitter, which is more accurate than ours.
549 549 return split_blocks(''.join(blocks))
550 550
551 551 #------------------------------------------------------------------------
552 552 # Private interface
553 553 #------------------------------------------------------------------------
554 554
555 555 def _find_indent(self, line):
556 556 """Compute the new indentation level for a single line.
557 557
558 558 Parameters
559 559 ----------
560 560 line : str
561 561 A single new line of non-whitespace, non-comment Python input.
562 562
563 563 Returns
564 564 -------
565 565 indent_spaces : int
566 566 New value for the indent level (it may be equal to self.indent_spaces
567 567 if indentation doesn't change.
568 568
569 569 full_dedent : boolean
570 570 Whether the new line causes a full flush-left dedent.
571 571 """
572 572 indent_spaces = self.indent_spaces
573 573 full_dedent = self._full_dedent
574 574
575 575 inisp = num_ini_spaces(line)
576 576 if inisp < indent_spaces:
577 577 indent_spaces = inisp
578 578 if indent_spaces <= 0:
579 579 #print 'Full dedent in text',self.source # dbg
580 580 full_dedent = True
581 581
582 582 if line[-1] == ':':
583 583 indent_spaces += 4
584 584 elif dedent_re.match(line):
585 585 indent_spaces -= 4
586 586 if indent_spaces <= 0:
587 587 full_dedent = True
588 588
589 589 # Safety
590 590 if indent_spaces < 0:
591 591 indent_spaces = 0
592 592 #print 'safety' # dbg
593 593
594 594 return indent_spaces, full_dedent
595 595
596 596 def _update_indent(self, lines):
597 597 for line in remove_comments(lines).splitlines():
598 598 if line and not line.isspace():
599 599 self.indent_spaces, self._full_dedent = self._find_indent(line)
600 600
601 601 def _store(self, lines, buffer=None, store='source'):
602 602 """Store one or more lines of input.
603 603
604 604 If input lines are not newline-terminated, a newline is automatically
605 605 appended."""
606
606 if not isinstance(lines, unicode):
607 lines = lines.decode(self.encoding)
608
607 609 if buffer is None:
608 610 buffer = self._buffer
609 611
610 612 if lines.endswith('\n'):
611 613 buffer.append(lines)
612 614 else:
613 615 buffer.append(lines+'\n')
614 616 setattr(self, store, self._set_source(buffer))
615 617
616 618 def _set_source(self, buffer):
617 619 return ''.join(buffer).encode(self.encoding)
618 620
619 621
620 622 #-----------------------------------------------------------------------------
621 623 # Functions and classes for IPython-specific syntactic support
622 624 #-----------------------------------------------------------------------------
623 625
624 626 # RegExp for splitting line contents into pre-char//first word-method//rest.
625 627 # For clarity, each group in on one line.
626 628
627 629 line_split = re.compile("""
628 630 ^(\s*) # any leading space
629 631 ([,;/%]|!!?|\?\??) # escape character or characters
630 632 \s*(%?[\w\.\*]*) # function/method, possibly with leading %
631 633 # to correctly treat things like '?%magic'
632 634 (\s+.*$|$) # rest of line
633 635 """, re.VERBOSE)
634 636
635 637
636 638 def split_user_input(line):
637 639 """Split user input into early whitespace, esc-char, function part and rest.
638 640
639 641 This is currently handles lines with '=' in them in a very inconsistent
640 642 manner.
641 643
642 644 Examples
643 645 ========
644 646 >>> split_user_input('x=1')
645 647 ('', '', 'x=1', '')
646 648 >>> split_user_input('?')
647 649 ('', '?', '', '')
648 650 >>> split_user_input('??')
649 651 ('', '??', '', '')
650 652 >>> split_user_input(' ?')
651 653 (' ', '?', '', '')
652 654 >>> split_user_input(' ??')
653 655 (' ', '??', '', '')
654 656 >>> split_user_input('??x')
655 657 ('', '??', 'x', '')
656 658 >>> split_user_input('?x=1')
657 659 ('', '', '?x=1', '')
658 660 >>> split_user_input('!ls')
659 661 ('', '!', 'ls', '')
660 662 >>> split_user_input(' !ls')
661 663 (' ', '!', 'ls', '')
662 664 >>> split_user_input('!!ls')
663 665 ('', '!!', 'ls', '')
664 666 >>> split_user_input(' !!ls')
665 667 (' ', '!!', 'ls', '')
666 668 >>> split_user_input(',ls')
667 669 ('', ',', 'ls', '')
668 670 >>> split_user_input(';ls')
669 671 ('', ';', 'ls', '')
670 672 >>> split_user_input(' ;ls')
671 673 (' ', ';', 'ls', '')
672 674 >>> split_user_input('f.g(x)')
673 675 ('', '', 'f.g(x)', '')
674 676 >>> split_user_input('f.g (x)')
675 677 ('', '', 'f.g', '(x)')
676 678 >>> split_user_input('?%hist')
677 679 ('', '?', '%hist', '')
678 680 >>> split_user_input('?x*')
679 681 ('', '?', 'x*', '')
680 682 """
681 683 match = line_split.match(line)
682 684 if match:
683 685 lspace, esc, fpart, rest = match.groups()
684 686 else:
685 687 # print "match failed for line '%s'" % line
686 688 try:
687 689 fpart, rest = line.split(None, 1)
688 690 except ValueError:
689 691 # print "split failed for line '%s'" % line
690 692 fpart, rest = line,''
691 693 lspace = re.match('^(\s*)(.*)', line).groups()[0]
692 694 esc = ''
693 695
694 696 # fpart has to be a valid python identifier, so it better be only pure
695 697 # ascii, no unicode:
696 698 try:
697 699 fpart = fpart.encode('ascii')
698 700 except UnicodeEncodeError:
699 701 lspace = unicode(lspace)
700 702 rest = fpart + u' ' + rest
701 703 fpart = u''
702 704
703 705 #print 'line:<%s>' % line # dbg
704 706 #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg
705 707 return lspace, esc, fpart.strip(), rest.lstrip()
706 708
707 709
708 710 # The escaped translators ALL receive a line where their own escape has been
709 711 # stripped. Only '?' is valid at the end of the line, all others can only be
710 712 # placed at the start.
711 713
712 714 class LineInfo(object):
713 715 """A single line of input and associated info.
714 716
715 717 This is a utility class that mostly wraps the output of
716 718 :func:`split_user_input` into a convenient object to be passed around
717 719 during input transformations.
718 720
719 721 Includes the following as properties:
720 722
721 723 line
722 724 The original, raw line
723 725
724 726 lspace
725 727 Any early whitespace before actual text starts.
726 728
727 729 esc
728 730 The initial esc character (or characters, for double-char escapes like
729 731 '??' or '!!').
730 732
731 733 fpart
732 734 The 'function part', which is basically the maximal initial sequence
733 735 of valid python identifiers and the '.' character. This is what is
734 736 checked for alias and magic transformations, used for auto-calling,
735 737 etc.
736 738
737 739 rest
738 740 Everything else on the line.
739 741 """
740 742 def __init__(self, line):
741 743 self.line = line
742 744 self.lspace, self.esc, self.fpart, self.rest = \
743 745 split_user_input(line)
744 746
745 747 def __str__(self):
746 748 return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc,
747 749 self.fpart, self.rest)
748 750
749 751
750 752 # Transformations of the special syntaxes that don't rely on an explicit escape
751 753 # character but instead on patterns on the input line
752 754
753 755 # The core transformations are implemented as standalone functions that can be
754 756 # tested and validated in isolation. Each of these uses a regexp, we
755 757 # pre-compile these and keep them close to each function definition for clarity
756 758
757 759 _assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
758 760 r'\s*=\s*!\s*(?P<cmd>.*)')
759 761
760 762 def transform_assign_system(line):
761 763 """Handle the `files = !ls` syntax."""
762 764 m = _assign_system_re.match(line)
763 765 if m is not None:
764 766 cmd = m.group('cmd')
765 767 lhs = m.group('lhs')
766 768 expr = make_quoted_expr(cmd)
767 769 new_line = '%s = get_ipython().getoutput(%s)' % (lhs, expr)
768 770 return new_line
769 771 return line
770 772
771 773
772 774 _assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
773 775 r'\s*=\s*%\s*(?P<cmd>.*)')
774 776
775 777 def transform_assign_magic(line):
776 778 """Handle the `a = %who` syntax."""
777 779 m = _assign_magic_re.match(line)
778 780 if m is not None:
779 781 cmd = m.group('cmd')
780 782 lhs = m.group('lhs')
781 783 expr = make_quoted_expr(cmd)
782 784 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
783 785 return new_line
784 786 return line
785 787
786 788
787 789 _classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )')
788 790
789 791 def transform_classic_prompt(line):
790 792 """Handle inputs that start with '>>> ' syntax."""
791 793
792 794 if not line or line.isspace():
793 795 return line
794 796 m = _classic_prompt_re.match(line)
795 797 if m:
796 798 return line[len(m.group(0)):]
797 799 else:
798 800 return line
799 801
800 802
801 803 _ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
802 804
803 805 def transform_ipy_prompt(line):
804 806 """Handle inputs that start classic IPython prompt syntax."""
805 807
806 808 if not line or line.isspace():
807 809 return line
808 810 #print 'LINE: %r' % line # dbg
809 811 m = _ipy_prompt_re.match(line)
810 812 if m:
811 813 #print 'MATCH! %r -> %r' % (line, line[len(m.group(0)):]) # dbg
812 814 return line[len(m.group(0)):]
813 815 else:
814 816 return line
815 817
816 818
817 819 class EscapedTransformer(object):
818 820 """Class to transform lines that are explicitly escaped out."""
819 821
820 822 def __init__(self):
821 823 tr = { ESC_SHELL : self._tr_system,
822 824 ESC_SH_CAP : self._tr_system2,
823 825 ESC_HELP : self._tr_help,
824 826 ESC_HELP2 : self._tr_help,
825 827 ESC_MAGIC : self._tr_magic,
826 828 ESC_QUOTE : self._tr_quote,
827 829 ESC_QUOTE2 : self._tr_quote2,
828 830 ESC_PAREN : self._tr_paren }
829 831 self.tr = tr
830 832
831 833 # Support for syntax transformations that use explicit escapes typed by the
832 834 # user at the beginning of a line
833 835 @staticmethod
834 836 def _tr_system(line_info):
835 837 "Translate lines escaped with: !"
836 838 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
837 839 return '%sget_ipython().system(%s)' % (line_info.lspace,
838 840 make_quoted_expr(cmd))
839 841
840 842 @staticmethod
841 843 def _tr_system2(line_info):
842 844 "Translate lines escaped with: !!"
843 845 cmd = line_info.line.lstrip()[2:]
844 846 return '%sget_ipython().getoutput(%s)' % (line_info.lspace,
845 847 make_quoted_expr(cmd))
846 848
847 849 @staticmethod
848 850 def _tr_help(line_info):
849 851 "Translate lines escaped with: ?/??"
850 852 # A naked help line should just fire the intro help screen
851 853 if not line_info.line[1:]:
852 854 return 'get_ipython().show_usage()'
853 855
854 856 # There may be one or two '?' at the end, move them to the front so that
855 857 # the rest of the logic can assume escapes are at the start
856 858 l_ori = line_info
857 859 line = line_info.line
858 860 if line.endswith('?'):
859 861 line = line[-1] + line[:-1]
860 862 if line.endswith('?'):
861 863 line = line[-1] + line[:-1]
862 864 line_info = LineInfo(line)
863 865
864 866 # From here on, simply choose which level of detail to get, and
865 867 # special-case the psearch syntax
866 868 pinfo = 'pinfo' # default
867 869 if '*' in line_info.line:
868 870 pinfo = 'psearch'
869 871 elif line_info.esc == '??':
870 872 pinfo = 'pinfo2'
871 873
872 874 tpl = '%sget_ipython().magic("%s %s")'
873 875 return tpl % (line_info.lspace, pinfo,
874 876 ' '.join([line_info.fpart, line_info.rest]).strip())
875 877
876 878 @staticmethod
877 879 def _tr_magic(line_info):
878 880 "Translate lines escaped with: %"
879 881 tpl = '%sget_ipython().magic(%s)'
880 882 cmd = make_quoted_expr(' '.join([line_info.fpart,
881 883 line_info.rest]).strip())
882 884 return tpl % (line_info.lspace, cmd)
883 885
884 886 @staticmethod
885 887 def _tr_quote(line_info):
886 888 "Translate lines escaped with: ,"
887 889 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
888 890 '", "'.join(line_info.rest.split()) )
889 891
890 892 @staticmethod
891 893 def _tr_quote2(line_info):
892 894 "Translate lines escaped with: ;"
893 895 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
894 896 line_info.rest)
895 897
896 898 @staticmethod
897 899 def _tr_paren(line_info):
898 900 "Translate lines escaped with: /"
899 901 return '%s%s(%s)' % (line_info.lspace, line_info.fpart,
900 902 ", ".join(line_info.rest.split()))
901 903
902 904 def __call__(self, line):
903 905 """Class to transform lines that are explicitly escaped out.
904 906
905 907 This calls the above _tr_* static methods for the actual line
906 908 translations."""
907 909
908 910 # Empty lines just get returned unmodified
909 911 if not line or line.isspace():
910 912 return line
911 913
912 914 # Get line endpoints, where the escapes can be
913 915 line_info = LineInfo(line)
914 916
915 917 # If the escape is not at the start, only '?' needs to be special-cased.
916 918 # All other escapes are only valid at the start
917 919 if not line_info.esc in self.tr:
918 920 if line.endswith(ESC_HELP):
919 921 return self._tr_help(line_info)
920 922 else:
921 923 # If we don't recognize the escape, don't modify the line
922 924 return line
923 925
924 926 return self.tr[line_info.esc](line_info)
925 927
926 928
927 929 # A function-looking object to be used by the rest of the code. The purpose of
928 930 # the class in this case is to organize related functionality, more than to
929 931 # manage state.
930 932 transform_escaped = EscapedTransformer()
931 933
932 934
933 935 class IPythonInputSplitter(InputSplitter):
934 936 """An input splitter that recognizes all of IPython's special syntax."""
935 937
936 938 # String with raw, untransformed input.
937 939 source_raw = ''
938 940
939 941 # Private attributes
940 942
941 943 # List with lines of raw input accumulated so far.
942 944 _buffer_raw = None
943 945
944 946 def __init__(self, input_mode=None):
945 947 InputSplitter.__init__(self, input_mode)
946 948 self._buffer_raw = []
947 949
948 950 def reset(self):
949 951 """Reset the input buffer and associated state."""
950 952 InputSplitter.reset(self)
951 953 self._buffer_raw[:] = []
952 954 self.source_raw = ''
953 955
954 956 def source_raw_reset(self):
955 957 """Return input and raw source and perform a full reset.
956 958 """
957 959 out = self.source
958 960 out_r = self.source_raw
959 961 self.reset()
960 962 return out, out_r
961 963
962 964 def push(self, lines):
963 965 """Push one or more lines of IPython input.
964 966 """
965 967 if not lines:
966 968 return super(IPythonInputSplitter, self).push(lines)
967 969
968 970 # We must ensure all input is pure unicode
969 971 if type(lines)==str:
970 972 lines = lines.decode(self.encoding)
971 973
972 974 lines_list = lines.splitlines()
973 975
974 976 transforms = [transform_escaped, transform_assign_system,
975 977 transform_assign_magic, transform_ipy_prompt,
976 978 transform_classic_prompt]
977 979
978 980 # Transform logic
979 981 #
980 982 # We only apply the line transformers to the input if we have either no
981 983 # input yet, or complete input, or if the last line of the buffer ends
982 984 # with ':' (opening an indented block). This prevents the accidental
983 985 # transformation of escapes inside multiline expressions like
984 986 # triple-quoted strings or parenthesized expressions.
985 987 #
986 988 # The last heuristic, while ugly, ensures that the first line of an
987 989 # indented block is correctly transformed.
988 990 #
989 991 # FIXME: try to find a cleaner approach for this last bit.
990 992
991 993 # If we were in 'block' mode, since we're going to pump the parent
992 994 # class by hand line by line, we need to temporarily switch out to
993 995 # 'line' mode, do a single manual reset and then feed the lines one
994 996 # by one. Note that this only matters if the input has more than one
995 997 # line.
996 998 changed_input_mode = False
997 999
998 1000 if self.input_mode == 'cell':
999 1001 self.reset()
1000 1002 changed_input_mode = True
1001 1003 saved_input_mode = 'cell'
1002 1004 self.input_mode = 'line'
1003 1005
1004 1006 # Store raw source before applying any transformations to it. Note
1005 1007 # that this must be done *after* the reset() call that would otherwise
1006 1008 # flush the buffer.
1007 1009 self._store(lines, self._buffer_raw, 'source_raw')
1008 1010
1009 1011 try:
1010 1012 push = super(IPythonInputSplitter, self).push
1011 1013 for line in lines_list:
1012 1014 if self._is_complete or not self._buffer or \
1013 1015 (self._buffer and self._buffer[-1].rstrip().endswith(':')):
1014 1016 for f in transforms:
1015 1017 line = f(line)
1016 1018
1017 1019 out = push(line)
1018 1020 finally:
1019 1021 if changed_input_mode:
1020 1022 self.input_mode = saved_input_mode
1021 1023 return out
General Comments 0
You need to be logged in to leave comments. Login now