##// END OF EJS Templates
Prototype transformer to assemble logical lines
Thomas Kluyver -
Show More
@@ -0,0 +1,125 b''
1 """This is a patched copy of the untokenize machinery from the standard library.
2
3 untokenize has a number of major bugs that render it almost useless. We're using
4 the patch written by Gareth Rees on Python issue 12961:
5
6 http://bugs.python.org/issue12691
7
8 We've undone one part of the patch - it encoded the output to bytes, to neatly
9 round-trip from tokenize. We want to keep working with text, so we don't encode.
10 """
11
12 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
13 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
14 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
15 'Michael Foord')
16 from token import *
17
18
19 from tokenize import COMMENT, NL
20
21 try:
22 # Python 3
23 from tokenize import ENCODING
24 except:
25 ENCODING = 987654321
26
27 class Untokenizer:
28
29 def __init__(self):
30 self.tokens = []
31 self.prev_row = 1
32 self.prev_col = 0
33 self.encoding = 'utf-8'
34
35 def add_whitespace(self, tok_type, start):
36 row, col = start
37 assert row >= self.prev_row
38 col_offset = col - self.prev_col
39 if col_offset > 0:
40 self.tokens.append(" " * col_offset)
41 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
42 # Line was backslash-continued.
43 self.tokens.append(" ")
44
45 def untokenize(self, tokens):
46 iterable = iter(tokens)
47 for t in iterable:
48 if len(t) == 2:
49 self.compat(t, iterable)
50 break
51 # IPython modification - valid Python 2 syntax
52 tok_type, token, start, end = t[:4]
53 if tok_type == ENCODING:
54 self.encoding = token
55 continue
56 self.add_whitespace(tok_type, start)
57 self.tokens.append(token)
58 self.prev_row, self.prev_col = end
59 if tok_type in (NEWLINE, NL):
60 self.prev_row += 1
61 self.prev_col = 0
62 # IPython modification - don't encode output
63 return "".join(self.tokens)
64
65 def compat(self, token, iterable):
66 # This import is here to avoid problems when the itertools
67 # module is not built yet and tokenize is imported.
68 from itertools import chain
69 startline = False
70 prevstring = False
71 indents = []
72 toks_append = self.tokens.append
73
74 for tok in chain([token], iterable):
75 toknum, tokval = tok[:2]
76 if toknum == ENCODING:
77 self.encoding = tokval
78 continue
79
80 if toknum in (NAME, NUMBER):
81 tokval += ' '
82
83 # Insert a space between two consecutive strings
84 if toknum == STRING:
85 if prevstring:
86 tokval = ' ' + tokval
87 prevstring = True
88 else:
89 prevstring = False
90
91 if toknum == INDENT:
92 indents.append(tokval)
93 continue
94 elif toknum == DEDENT:
95 indents.pop()
96 continue
97 elif toknum in (NEWLINE, NL):
98 startline = True
99 elif startline and indents:
100 toks_append(indents[-1])
101 startline = False
102 toks_append(tokval)
103
104
105 def untokenize(tokens):
106 """
107 Convert ``tokens`` (an iterable) back into Python source code. Return
108 a bytes object, encoded using the encoding specified by the last
109 ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
110
111 The result is guaranteed to tokenize back to match the input so that
112 the conversion is lossless and round-trips are assured. The
113 guarantee applies only to the token type and token string as the
114 spacing between tokens (column positions) may change.
115
116 :func:`untokenize` has two modes. If the input tokens are sequences
117 of length 2 (``type``, ``string``) then spaces are added as necessary to
118 preserve the round-trip property.
119
120 If the input tokens are sequences of length 4 or more (``type``,
121 ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
122 spaces are added so that each token appears in the result at the
123 position indicated by ``start`` and ``end``, if possible.
124 """
125 return Untokenizer().untokenize(tokens)
@@ -1,652 +1,651 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63
64 64 #-----------------------------------------------------------------------------
65 65 # Imports
66 66 #-----------------------------------------------------------------------------
67 67 # stdlib
68 68 import ast
69 69 import codeop
70 70 import re
71 71 import sys
72 72
73 73 # IPython modules
74 74 from IPython.core.splitinput import split_user_input, LineInfo
75 75 from IPython.utils.py3compat import cast_unicode
76 76 from IPython.core.inputtransformer import (leading_indent,
77 77 classic_prompt,
78 78 ipy_prompt,
79 79 cellmagic,
80 assemble_logical_lines,
80 81 help_end,
81 82 escaped_transformer,
82 83 assign_from_magic,
83 84 assign_from_system,
84 85 )
85 86
86 87 # Temporary!
87 88 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
88 89 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
89 90 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
90 91
91 92 #-----------------------------------------------------------------------------
92 93 # Utilities
93 94 #-----------------------------------------------------------------------------
94 95
95 96 # FIXME: These are general-purpose utilities that later can be moved to the
96 97 # general ward. Kept here for now because we're being very strict about test
97 98 # coverage with this code, and this lets us ensure that we keep 100% coverage
98 99 # while developing.
99 100
100 101 # compiled regexps for autoindent management
101 102 dedent_re = re.compile('|'.join([
102 103 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
103 104 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
104 105 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
105 106 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
106 107 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
107 108 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
108 109 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
109 110 ]))
110 111 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
111 112
112 113 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
113 114 # before pure comments
114 115 comment_line_re = re.compile('^\s*\#')
115 116
116 117
117 118 def num_ini_spaces(s):
118 119 """Return the number of initial spaces in a string.
119 120
120 121 Note that tabs are counted as a single space. For now, we do *not* support
121 122 mixing of tabs and spaces in the user's input.
122 123
123 124 Parameters
124 125 ----------
125 126 s : string
126 127
127 128 Returns
128 129 -------
129 130 n : int
130 131 """
131 132
132 133 ini_spaces = ini_spaces_re.match(s)
133 134 if ini_spaces:
134 135 return ini_spaces.end()
135 136 else:
136 137 return 0
137 138
138 139 def last_blank(src):
139 140 """Determine if the input source ends in a blank.
140 141
141 142 A blank is either a newline or a line consisting of whitespace.
142 143
143 144 Parameters
144 145 ----------
145 146 src : string
146 147 A single or multiline string.
147 148 """
148 149 if not src: return False
149 150 ll = src.splitlines()[-1]
150 151 return (ll == '') or ll.isspace()
151 152
152 153
153 154 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
154 155 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
155 156
156 157 def last_two_blanks(src):
157 158 """Determine if the input source ends in two blanks.
158 159
159 160 A blank is either a newline or a line consisting of whitespace.
160 161
161 162 Parameters
162 163 ----------
163 164 src : string
164 165 A single or multiline string.
165 166 """
166 167 if not src: return False
167 168 # The logic here is tricky: I couldn't get a regexp to work and pass all
168 169 # the tests, so I took a different approach: split the source by lines,
169 170 # grab the last two and prepend '###\n' as a stand-in for whatever was in
170 171 # the body before the last two lines. Then, with that structure, it's
171 172 # possible to analyze with two regexps. Not the most elegant solution, but
172 173 # it works. If anyone tries to change this logic, make sure to validate
173 174 # the whole test suite first!
174 175 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
175 176 return (bool(last_two_blanks_re.match(new_src)) or
176 177 bool(last_two_blanks_re2.match(new_src)) )
177 178
178 179
179 180 def remove_comments(src):
180 181 """Remove all comments from input source.
181 182
182 183 Note: comments are NOT recognized inside of strings!
183 184
184 185 Parameters
185 186 ----------
186 187 src : string
187 188 A single or multiline input string.
188 189
189 190 Returns
190 191 -------
191 192 String with all Python comments removed.
192 193 """
193 194
194 195 return re.sub('#.*', '', src)
195 196
196 197
197 198 def get_input_encoding():
198 199 """Return the default standard input encoding.
199 200
200 201 If sys.stdin has no encoding, 'ascii' is returned."""
201 202 # There are strange environments for which sys.stdin.encoding is None. We
202 203 # ensure that a valid encoding is returned.
203 204 encoding = getattr(sys.stdin, 'encoding', None)
204 205 if encoding is None:
205 206 encoding = 'ascii'
206 207 return encoding
207 208
208 209 #-----------------------------------------------------------------------------
209 210 # Classes and functions for normal Python syntax handling
210 211 #-----------------------------------------------------------------------------
211 212
212 213 class InputSplitter(object):
213 214 """An object that can accumulate lines of Python source before execution.
214 215
215 216 This object is designed to be fed python source line-by-line, using
216 217 :meth:`push`. It will return on each push whether the currently pushed
217 218 code could be executed already. In addition, it provides a method called
218 219 :meth:`push_accepts_more` that can be used to query whether more input
219 220 can be pushed into a single interactive block.
220 221
221 222 This is a simple example of how an interactive terminal-based client can use
222 223 this tool::
223 224
224 225 isp = InputSplitter()
225 226 while isp.push_accepts_more():
226 227 indent = ' '*isp.indent_spaces
227 228 prompt = '>>> ' + indent
228 229 line = indent + raw_input(prompt)
229 230 isp.push(line)
230 231 print 'Input source was:\n', isp.source_reset(),
231 232 """
232 233 # Number of spaces of indentation computed from input that has been pushed
233 234 # so far. This is the attributes callers should query to get the current
234 235 # indentation level, in order to provide auto-indent facilities.
235 236 indent_spaces = 0
236 237 # String, indicating the default input encoding. It is computed by default
237 238 # at initialization time via get_input_encoding(), but it can be reset by a
238 239 # client with specific knowledge of the encoding.
239 240 encoding = ''
240 241 # String where the current full source input is stored, properly encoded.
241 242 # Reading this attribute is the normal way of querying the currently pushed
242 243 # source code, that has been properly encoded.
243 244 source = ''
244 245 # Code object corresponding to the current source. It is automatically
245 246 # synced to the source, so it can be queried at any time to obtain the code
246 247 # object; it will be None if the source doesn't compile to valid Python.
247 248 code = None
248 249 # Input mode
249 250 input_mode = 'line'
250 251
251 252 # Private attributes
252 253
253 254 # List with lines of input accumulated so far
254 255 _buffer = None
255 256 # Command compiler
256 257 _compile = None
257 258 # Mark when input has changed indentation all the way back to flush-left
258 259 _full_dedent = False
259 260 # Boolean indicating whether the current block is complete
260 261 _is_complete = None
261 262
262 263 def __init__(self, input_mode=None):
263 264 """Create a new InputSplitter instance.
264 265
265 266 Parameters
266 267 ----------
267 268 input_mode : str
268 269
269 270 One of ['line', 'cell']; default is 'line'.
270 271
271 272 The input_mode parameter controls how new inputs are used when fed via
272 273 the :meth:`push` method:
273 274
274 275 - 'line': meant for line-oriented clients, inputs are appended one at a
275 276 time to the internal buffer and the whole buffer is compiled.
276 277
277 278 - 'cell': meant for clients that can edit multi-line 'cells' of text at
278 279 a time. A cell can contain one or more blocks that can be compile in
279 280 'single' mode by Python. In this mode, each new input new input
280 281 completely replaces all prior inputs. Cell mode is thus equivalent
281 282 to prepending a full reset() to every push() call.
282 283 """
283 284 self._buffer = []
284 285 self._compile = codeop.CommandCompiler()
285 286 self.encoding = get_input_encoding()
286 287 self.input_mode = InputSplitter.input_mode if input_mode is None \
287 288 else input_mode
288 289
289 290 def reset(self):
290 291 """Reset the input buffer and associated state."""
291 292 self.indent_spaces = 0
292 293 self._buffer[:] = []
293 294 self.source = ''
294 295 self.code = None
295 296 self._is_complete = False
296 297 self._full_dedent = False
297 298
298 299 def source_reset(self):
299 300 """Return the input source and perform a full reset.
300 301 """
301 302 out = self.source
302 303 self.reset()
303 304 return out
304 305
305 306 def push(self, lines):
306 307 """Push one or more lines of input.
307 308
308 309 This stores the given lines and returns a status code indicating
309 310 whether the code forms a complete Python block or not.
310 311
311 312 Any exceptions generated in compilation are swallowed, but if an
312 313 exception was produced, the method returns True.
313 314
314 315 Parameters
315 316 ----------
316 317 lines : string
317 318 One or more lines of Python input.
318 319
319 320 Returns
320 321 -------
321 322 is_complete : boolean
322 323 True if the current input source (the result of the current input
323 324 plus prior inputs) forms a complete Python execution block. Note that
324 325 this value is also stored as a private attribute (``_is_complete``), so it
325 326 can be queried at any time.
326 327 """
327 328 if self.input_mode == 'cell':
328 329 self.reset()
329 330
330 331 self._store(lines)
331 332 source = self.source
332 333
333 334 # Before calling _compile(), reset the code object to None so that if an
334 335 # exception is raised in compilation, we don't mislead by having
335 336 # inconsistent code/source attributes.
336 337 self.code, self._is_complete = None, None
337 338
338 339 # Honor termination lines properly
339 340 if source.endswith('\\\n'):
340 341 return False
341 342
342 343 self._update_indent(lines)
343 344 try:
344 345 self.code = self._compile(source, symbol="exec")
345 346 # Invalid syntax can produce any of a number of different errors from
346 347 # inside the compiler, so we have to catch them all. Syntax errors
347 348 # immediately produce a 'ready' block, so the invalid Python can be
348 349 # sent to the kernel for evaluation with possible ipython
349 350 # special-syntax conversion.
350 351 except (SyntaxError, OverflowError, ValueError, TypeError,
351 352 MemoryError):
352 353 self._is_complete = True
353 354 else:
354 355 # Compilation didn't produce any exceptions (though it may not have
355 356 # given a complete code object)
356 357 self._is_complete = self.code is not None
357 358
358 359 return self._is_complete
359 360
360 361 def push_accepts_more(self):
361 362 """Return whether a block of interactive input can accept more input.
362 363
363 364 This method is meant to be used by line-oriented frontends, who need to
364 365 guess whether a block is complete or not based solely on prior and
365 366 current input lines. The InputSplitter considers it has a complete
366 367 interactive block and will not accept more input only when either a
367 368 SyntaxError is raised, or *all* of the following are true:
368 369
369 370 1. The input compiles to a complete statement.
370 371
371 372 2. The indentation level is flush-left (because if we are indented,
372 373 like inside a function definition or for loop, we need to keep
373 374 reading new input).
374 375
375 376 3. There is one extra line consisting only of whitespace.
376 377
377 378 Because of condition #3, this method should be used only by
378 379 *line-oriented* frontends, since it means that intermediate blank lines
379 380 are not allowed in function definitions (or any other indented block).
380 381
381 382 If the current input produces a syntax error, this method immediately
382 383 returns False but does *not* raise the syntax error exception, as
383 384 typically clients will want to send invalid syntax to an execution
384 385 backend which might convert the invalid syntax into valid Python via
385 386 one of the dynamic IPython mechanisms.
386 387 """
387 388
388 389 # With incomplete input, unconditionally accept more
389 390 if not self._is_complete:
390 391 return True
391 392
392 393 # If we already have complete input and we're flush left, the answer
393 394 # depends. In line mode, if there hasn't been any indentation,
394 395 # that's it. If we've come back from some indentation, we need
395 396 # the blank final line to finish.
396 397 # In cell mode, we need to check how many blocks the input so far
397 398 # compiles into, because if there's already more than one full
398 399 # independent block of input, then the client has entered full
399 400 # 'cell' mode and is feeding lines that each is complete. In this
400 401 # case we should then keep accepting. The Qt terminal-like console
401 402 # does precisely this, to provide the convenience of terminal-like
402 403 # input of single expressions, but allowing the user (with a
403 404 # separate keystroke) to switch to 'cell' mode and type multiple
404 405 # expressions in one shot.
405 406 if self.indent_spaces==0:
406 407 if self.input_mode=='line':
407 408 if not self._full_dedent:
408 409 return False
409 410 else:
410 411 try:
411 412 code_ast = ast.parse(u''.join(self._buffer))
412 413 except Exception:
413 414 return False
414 415 else:
415 416 if len(code_ast.body) == 1:
416 417 return False
417 418
418 419 # When input is complete, then termination is marked by an extra blank
419 420 # line at the end.
420 421 last_line = self.source.splitlines()[-1]
421 422 return bool(last_line and not last_line.isspace())
422 423
423 424 #------------------------------------------------------------------------
424 425 # Private interface
425 426 #------------------------------------------------------------------------
426 427
427 428 def _find_indent(self, line):
428 429 """Compute the new indentation level for a single line.
429 430
430 431 Parameters
431 432 ----------
432 433 line : str
433 434 A single new line of non-whitespace, non-comment Python input.
434 435
435 436 Returns
436 437 -------
437 438 indent_spaces : int
438 439 New value for the indent level (it may be equal to self.indent_spaces
439 440 if indentation doesn't change.
440 441
441 442 full_dedent : boolean
442 443 Whether the new line causes a full flush-left dedent.
443 444 """
444 445 indent_spaces = self.indent_spaces
445 446 full_dedent = self._full_dedent
446 447
447 448 inisp = num_ini_spaces(line)
448 449 if inisp < indent_spaces:
449 450 indent_spaces = inisp
450 451 if indent_spaces <= 0:
451 452 #print 'Full dedent in text',self.source # dbg
452 453 full_dedent = True
453 454
454 455 if line.rstrip()[-1] == ':':
455 456 indent_spaces += 4
456 457 elif dedent_re.match(line):
457 458 indent_spaces -= 4
458 459 if indent_spaces <= 0:
459 460 full_dedent = True
460 461
461 462 # Safety
462 463 if indent_spaces < 0:
463 464 indent_spaces = 0
464 465 #print 'safety' # dbg
465 466
466 467 return indent_spaces, full_dedent
467 468
468 469 def _update_indent(self, lines):
469 470 for line in remove_comments(lines).splitlines():
470 471 if line and not line.isspace():
471 472 self.indent_spaces, self._full_dedent = self._find_indent(line)
472 473
473 474 def _store(self, lines, buffer=None, store='source'):
474 475 """Store one or more lines of input.
475 476
476 477 If input lines are not newline-terminated, a newline is automatically
477 478 appended."""
478 479
479 480 if buffer is None:
480 481 buffer = self._buffer
481 482
482 483 if lines.endswith('\n'):
483 484 buffer.append(lines)
484 485 else:
485 486 buffer.append(lines+'\n')
486 487 setattr(self, store, self._set_source(buffer))
487 488
488 489 def _set_source(self, buffer):
489 490 return u''.join(buffer)
490 491
491 492
492 493 class IPythonInputSplitter(InputSplitter):
493 494 """An input splitter that recognizes all of IPython's special syntax."""
494 495
495 496 # String with raw, untransformed input.
496 497 source_raw = ''
497 498
498 499 # Flag to track when a transformer has stored input that it hasn't given
499 500 # back yet.
500 501 transformer_accumulating = False
501 502
502 503 # Private attributes
503 504
504 505 # List with lines of raw input accumulated so far.
505 506 _buffer_raw = None
506 507
507 508 def __init__(self, input_mode=None, transforms=None):
508 509 super(IPythonInputSplitter, self).__init__(input_mode)
509 510 self._buffer_raw = []
510 511 self._validate = True
511 512 if transforms is not None:
512 513 self.transforms = transforms
513 514 else:
514 515 self.transforms = [leading_indent(),
515 516 classic_prompt(),
516 517 ipy_prompt(),
517 518 cellmagic(),
519 assemble_logical_lines(),
518 520 help_end(),
519 521 escaped_transformer(),
520 522 assign_from_magic(),
521 523 assign_from_system(),
522 524 ]
523 525
524 526 def reset(self):
525 527 """Reset the input buffer and associated state."""
526 528 super(IPythonInputSplitter, self).reset()
527 529 self._buffer_raw[:] = []
528 530 self.source_raw = ''
529 531 self.transformer_accumulating = False
530 532 for t in self.transforms:
531 533 t.reset()
532 534
533 535 def flush_transformers(self):
534 536 out = None
535 537 for t in self.transforms:
536 538 tmp = t.reset()
537 539 if tmp:
538 540 out = tmp
539 541 if out:
540 542 self._store(out)
541 543
542 544 def source_raw_reset(self):
543 545 """Return input and raw source and perform a full reset.
544 546 """
545 547 self.flush_transformers()
546 548 out = self.source
547 549 out_r = self.source_raw
548 550 self.reset()
549 551 return out, out_r
550 552
551 553 def source_reset(self):
552 554 self.flush_transformers()
553 555 return super(IPythonInputSplitter, self).source_reset()
554 556
555 557 def push_accepts_more(self):
556 558 if self.transformer_accumulating:
557 559 return True
558 560 else:
559 561 return super(IPythonInputSplitter, self).push_accepts_more()
560 562
561 563 def transform_cell(self, cell):
562 564 """Process and translate a cell of input.
563 565 """
564 566 self.reset()
565 567 self.push(cell)
566 568 return self.source_reset()
567 569
568 570 def push(self, lines):
569 571 """Push one or more lines of IPython input.
570 572
571 573 This stores the given lines and returns a status code indicating
572 574 whether the code forms a complete Python block or not, after processing
573 575 all input lines for special IPython syntax.
574 576
575 577 Any exceptions generated in compilation are swallowed, but if an
576 578 exception was produced, the method returns True.
577 579
578 580 Parameters
579 581 ----------
580 582 lines : string
581 583 One or more lines of Python input.
582 584
583 585 Returns
584 586 -------
585 587 is_complete : boolean
586 588 True if the current input source (the result of the current input
587 589 plus prior inputs) forms a complete Python execution block. Note that
588 590 this value is also stored as a private attribute (_is_complete), so it
589 591 can be queried at any time.
590 592 """
591 593
592 594 # We must ensure all input is pure unicode
593 595 lines = cast_unicode(lines, self.encoding)
594 596
595 597 # ''.splitlines() --> [], but we need to push the empty line to transformers
596 598 lines_list = lines.splitlines()
597 599 if not lines_list:
598 600 lines_list = ['']
599 601
600 602 # Transform logic
601 603 #
602 604 # We only apply the line transformers to the input if we have either no
603 605 # input yet, or complete input, or if the last line of the buffer ends
604 606 # with ':' (opening an indented block). This prevents the accidental
605 607 # transformation of escapes inside multiline expressions like
606 608 # triple-quoted strings or parenthesized expressions.
607 609 #
608 610 # The last heuristic, while ugly, ensures that the first line of an
609 611 # indented block is correctly transformed.
610 612 #
611 613 # FIXME: try to find a cleaner approach for this last bit.
612 614
613 615 # If we were in 'block' mode, since we're going to pump the parent
614 616 # class by hand line by line, we need to temporarily switch out to
615 617 # 'line' mode, do a single manual reset and then feed the lines one
616 618 # by one. Note that this only matters if the input has more than one
617 619 # line.
618 620 changed_input_mode = False
619 621
620 622 if self.input_mode == 'cell':
621 623 self.reset()
622 624 changed_input_mode = True
623 625 saved_input_mode = 'cell'
624 626 self.input_mode = 'line'
625 627
626 628 # Store raw source before applying any transformations to it. Note
627 629 # that this must be done *after* the reset() call that would otherwise
628 630 # flush the buffer.
629 631 self._store(lines, self._buffer_raw, 'source_raw')
630 632
631 633 try:
632 634 for line in lines_list:
633 635 out = self.push_line(line)
634 636 finally:
635 637 if changed_input_mode:
636 638 self.input_mode = saved_input_mode
637 639
638 640 return out
639 641
640 642 def push_line(self, line):
641 643 buf = self._buffer
642 not_in_string = self._is_complete or not buf or \
643 (buf and buf[-1].rstrip().endswith((':', ',')))
644 644 for transformer in self.transforms:
645 if not_in_string or transformer.look_in_string:
646 line = transformer.push(line)
647 if line is None:
648 self.transformer_accumulating = True
649 return False
645 line = transformer.push(line)
646 if line is None:
647 self.transformer_accumulating = True
648 return False
650 649
651 650 self.transformer_accumulating = False
652 651 return super(IPythonInputSplitter, self).push(line)
@@ -1,441 +1,454 b''
1 1 import abc
2 2 import functools
3 3 import re
4 4 from StringIO import StringIO
5 5 import tokenize
6 6
7 try:
8 generate_tokens = tokenize.generate_tokens
9 except AttributeError:
10 # Python 3. Note that we use the undocumented _tokenize because it expects
11 # strings, not bytes. See also Python issue #9969.
12 generate_tokens = tokenize._tokenize
13
7 14 from IPython.core.splitinput import split_user_input, LineInfo
15 from IPython.utils.untokenize import untokenize
8 16
9 17 #-----------------------------------------------------------------------------
10 18 # Globals
11 19 #-----------------------------------------------------------------------------
12 20
13 21 # The escape sequences that define the syntax transformations IPython will
14 22 # apply to user input. These can NOT be just changed here: many regular
15 23 # expressions and other parts of the code may use their hardcoded values, and
16 24 # for all intents and purposes they constitute the 'IPython syntax', so they
17 25 # should be considered fixed.
18 26
19 27 ESC_SHELL = '!' # Send line to underlying system shell
20 28 ESC_SH_CAP = '!!' # Send line to system shell and capture output
21 29 ESC_HELP = '?' # Find information about object
22 30 ESC_HELP2 = '??' # Find extra-detailed information about object
23 31 ESC_MAGIC = '%' # Call magic function
24 32 ESC_MAGIC2 = '%%' # Call cell-magic function
25 33 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
26 34 ESC_QUOTE2 = ';' # Quote all args as a single string, call
27 35 ESC_PAREN = '/' # Call first argument with rest of line as arguments
28 36
29 37 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
30 38 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
31 39 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
32 40
33 41
34 42 class InputTransformer(object):
35 43 """Abstract base class for line-based input transformers."""
36 44 __metaclass__ = abc.ABCMeta
37 45
38 46 @abc.abstractmethod
39 47 def push(self, line):
40 48 """Send a line of input to the transformer, returning the transformed
41 49 input or None if the transformer is waiting for more input.
42 50
43 51 Must be overridden by subclasses.
44 52 """
45 53 pass
46 54
47 55 @abc.abstractmethod
48 56 def reset(self):
49 57 """Return, transformed any lines that the transformer has accumulated,
50 58 and reset its internal state.
51 59
52 60 Must be overridden by subclasses.
53 61 """
54 62 pass
55 63
56 64 # Set this to True to allow the transformer to act on lines inside strings.
57 65 look_in_string = False
58 66
59 67 @classmethod
60 68 def wrap(cls, func):
61 69 """Can be used by subclasses as a decorator, to return a factory that
62 70 will allow instantiation with the decorated object.
63 71 """
64 72 @functools.wraps(func)
65 73 def transformer_factory():
66 74 transformer = cls(func)
67 75 if getattr(transformer_factory, 'look_in_string', False):
68 76 transformer.look_in_string = True
69 77 return transformer
70 78
71 79 return transformer_factory
72 80
73 81 class StatelessInputTransformer(InputTransformer):
74 82 """Wrapper for a stateless input transformer implemented as a function."""
75 83 def __init__(self, func):
76 84 self.func = func
77 85
78 86 def __repr__(self):
79 87 return "StatelessInputTransformer(func={!r})".format(self.func)
80 88
81 89 def push(self, line):
82 90 """Send a line of input to the transformer, returning the
83 91 transformed input."""
84 92 return self.func(line)
85 93
86 94 def reset(self):
87 95 """No-op - exists for compatibility."""
88 96 pass
89 97
90 98 class CoroutineInputTransformer(InputTransformer):
91 99 """Wrapper for an input transformer implemented as a coroutine."""
92 100 def __init__(self, coro):
93 101 # Prime it
94 102 self.coro = coro()
95 103 next(self.coro)
96 104
97 105 def __repr__(self):
98 106 return "CoroutineInputTransformer(coro={!r})".format(self.coro)
99 107
100 108 def push(self, line):
101 109 """Send a line of input to the transformer, returning the
102 110 transformed input or None if the transformer is waiting for more
103 111 input.
104 112 """
105 113 return self.coro.send(line)
106 114
107 115 def reset(self):
108 116 """Return, transformed any lines that the transformer has
109 117 accumulated, and reset its internal state.
110 118 """
111 119 return self.coro.send(None)
112 120
113 121 class TokenInputTransformer(InputTransformer):
114 122 """Wrapper for a token-based input transformer.
115 123
116 124 func should accept a list of tokens (5-tuples, see tokenize docs), and
117 125 return an iterable which can be passed to tokenize.untokenize().
118 126 """
119 127 def __init__(self, func):
120 128 self.func = func
121 129 self.current_line = ""
122 self.tokenizer = tokenize.generate_tokens(self.get_line)
123 130 self.line_used= False
131 self.reset_tokenizer()
132
133 def reset_tokenizer(self):
134 self.tokenizer = generate_tokens(self.get_line)
124 135
125 136 def get_line(self):
126 137 if self.line_used:
127 138 raise tokenize.TokenError
128 139 self.line_used = True
129 140 return self.current_line
130 141
131 142 def push(self, line):
132 143 self.current_line += line + "\n"
133 144 self.line_used = False
134 145 tokens = []
135 146 try:
136 147 for intok in self.tokenizer:
137 148 tokens.append(intok)
138 149 if intok[0] in (tokenize.NEWLINE, tokenize.NL):
139 150 # Stop before we try to pull a line we don't have yet
140 151 break
141 152 except tokenize.TokenError:
142 153 # Multi-line statement - stop and try again with the next line
143 self.tokenizer = tokenize.generate_tokens(self.get_line)
154 self.reset_tokenizer()
144 155 return None
145 156
146 157 self.current_line = ""
147 # Python bug 8478 - untokenize doesn't work quite correctly with a
148 # generator. We call list() to avoid this.
149 return tokenize.untokenize(list(self.func(tokens))).rstrip('\n')
158 self.reset_tokenizer()
159 return untokenize(self.func(tokens)).rstrip('\n')
150 160
151 161 def reset(self):
152 162 l = self.current_line
153 163 self.current_line = ""
154 164 if l:
155 165 return l.rstrip('\n')
156 166
167 @TokenInputTransformer.wrap
168 def assemble_logical_lines(tokens):
169 return tokens
157 170
158 171 # Utilities
159 172 def _make_help_call(target, esc, lspace, next_input=None):
160 173 """Prepares a pinfo(2)/psearch call from a target name and the escape
161 174 (i.e. ? or ??)"""
162 175 method = 'pinfo2' if esc == '??' \
163 176 else 'psearch' if '*' in target \
164 177 else 'pinfo'
165 178 arg = " ".join([method, target])
166 179 if next_input is None:
167 180 return '%sget_ipython().magic(%r)' % (lspace, arg)
168 181 else:
169 182 return '%sget_ipython().set_next_input(%r);get_ipython().magic(%r)' % \
170 183 (lspace, next_input, arg)
171 184
172 185 @CoroutineInputTransformer.wrap
173 186 def escaped_transformer():
174 187 """Translate lines beginning with one of IPython's escape characters.
175 188
176 189 This is stateful to allow magic commands etc. to be continued over several
177 190 lines using explicit line continuations (\ at the end of a line).
178 191 """
179 192
180 193 # These define the transformations for the different escape characters.
181 194 def _tr_system(line_info):
182 195 "Translate lines escaped with: !"
183 196 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
184 197 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
185 198
186 199 def _tr_system2(line_info):
187 200 "Translate lines escaped with: !!"
188 201 cmd = line_info.line.lstrip()[2:]
189 202 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
190 203
191 204 def _tr_help(line_info):
192 205 "Translate lines escaped with: ?/??"
193 206 # A naked help line should just fire the intro help screen
194 207 if not line_info.line[1:]:
195 208 return 'get_ipython().show_usage()'
196 209
197 210 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
198 211
199 212 def _tr_magic(line_info):
200 213 "Translate lines escaped with: %"
201 214 tpl = '%sget_ipython().magic(%r)'
202 215 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
203 216 return tpl % (line_info.pre, cmd)
204 217
205 218 def _tr_quote(line_info):
206 219 "Translate lines escaped with: ,"
207 220 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
208 221 '", "'.join(line_info.the_rest.split()) )
209 222
210 223 def _tr_quote2(line_info):
211 224 "Translate lines escaped with: ;"
212 225 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
213 226 line_info.the_rest)
214 227
215 228 def _tr_paren(line_info):
216 229 "Translate lines escaped with: /"
217 230 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
218 231 ", ".join(line_info.the_rest.split()))
219 232
220 233 tr = { ESC_SHELL : _tr_system,
221 234 ESC_SH_CAP : _tr_system2,
222 235 ESC_HELP : _tr_help,
223 236 ESC_HELP2 : _tr_help,
224 237 ESC_MAGIC : _tr_magic,
225 238 ESC_QUOTE : _tr_quote,
226 239 ESC_QUOTE2 : _tr_quote2,
227 240 ESC_PAREN : _tr_paren }
228 241
229 242 line = ''
230 243 while True:
231 244 line = (yield line)
232 245 if not line or line.isspace():
233 246 continue
234 247 lineinf = LineInfo(line)
235 248 if lineinf.esc not in tr:
236 249 continue
237 250
238 251 parts = []
239 252 while line is not None:
240 253 parts.append(line.rstrip('\\'))
241 254 if not line.endswith('\\'):
242 255 break
243 256 line = (yield None)
244 257
245 258 # Output
246 259 lineinf = LineInfo(' '.join(parts))
247 260 line = tr[lineinf.esc](lineinf)
248 261
249 262 _initial_space_re = re.compile(r'\s*')
250 263
251 264 _help_end_re = re.compile(r"""(%{0,2}
252 265 [a-zA-Z_*][\w*]* # Variable name
253 266 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
254 267 )
255 268 (\?\??)$ # ? or ??""",
256 269 re.VERBOSE)
257 270
258 271 def has_comment(src):
259 272 """Indicate whether an input line has (i.e. ends in, or is) a comment.
260 273
261 274 This uses tokenize, so it can distinguish comments from # inside strings.
262 275
263 276 Parameters
264 277 ----------
265 278 src : string
266 279 A single line input string.
267 280
268 281 Returns
269 282 -------
270 283 Boolean: True if source has a comment.
271 284 """
272 285 readline = StringIO(src).readline
273 286 toktypes = set()
274 287 try:
275 288 for t in tokenize.generate_tokens(readline):
276 289 toktypes.add(t[0])
277 290 except tokenize.TokenError:
278 291 pass
279 292 return(tokenize.COMMENT in toktypes)
280 293
281 294
282 295 @StatelessInputTransformer.wrap
283 296 def help_end(line):
284 297 """Translate lines with ?/?? at the end"""
285 298 m = _help_end_re.search(line)
286 299 if m is None or has_comment(line):
287 300 return line
288 301 target = m.group(1)
289 302 esc = m.group(3)
290 303 lspace = _initial_space_re.match(line).group(0)
291 304
292 305 # If we're mid-command, put it back on the next prompt for the user.
293 306 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
294 307
295 308 return _make_help_call(target, esc, lspace, next_input)
296 309
297 310
298 311 @CoroutineInputTransformer.wrap
299 312 def cellmagic():
300 313 """Captures & transforms cell magics.
301 314
302 315 After a cell magic is started, this stores up any lines it gets until it is
303 316 reset (sent None).
304 317 """
305 318 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
306 319 cellmagic_help_re = re.compile('%%\w+\?')
307 320 line = ''
308 321 while True:
309 322 line = (yield line)
310 323 if (not line) or (not line.startswith(ESC_MAGIC2)):
311 324 continue
312 325
313 326 if cellmagic_help_re.match(line):
314 327 # This case will be handled by help_end
315 328 continue
316 329
317 330 first = line
318 331 body = []
319 332 line = (yield None)
320 333 while (line is not None) and (line.strip() != ''):
321 334 body.append(line)
322 335 line = (yield None)
323 336
324 337 # Output
325 338 magic_name, _, first = first.partition(' ')
326 339 magic_name = magic_name.lstrip(ESC_MAGIC2)
327 340 line = tpl % (magic_name, first, u'\n'.join(body))
328 341
329 342
330 343 def _strip_prompts(prompt1_re, prompt2_re):
331 344 """Remove matching input prompts from a block of input."""
332 345 line = ''
333 346 while True:
334 347 line = (yield line)
335 348
336 349 if line is None:
337 350 continue
338 351
339 352 m = prompt1_re.match(line)
340 353 if m:
341 354 while m:
342 355 line = (yield line[len(m.group(0)):])
343 356 if line is None:
344 357 break
345 358 m = prompt2_re.match(line)
346 359 else:
347 360 # Prompts not in input - wait for reset
348 361 while line is not None:
349 362 line = (yield line)
350 363
351 364 @CoroutineInputTransformer.wrap
352 365 def classic_prompt():
353 366 """Strip the >>>/... prompts of the Python interactive shell."""
354 367 prompt1_re = re.compile(r'^(>>> )')
355 368 prompt2_re = re.compile(r'^(>>> |^\.\.\. )')
356 369 return _strip_prompts(prompt1_re, prompt2_re)
357 370
358 371 classic_prompt.look_in_string = True
359 372
360 373 @CoroutineInputTransformer.wrap
361 374 def ipy_prompt():
362 375 """Strip IPython's In [1]:/...: prompts."""
363 376 prompt1_re = re.compile(r'^In \[\d+\]: ')
364 377 prompt2_re = re.compile(r'^(In \[\d+\]: |^\ \ \ \.\.\.+: )')
365 378 return _strip_prompts(prompt1_re, prompt2_re)
366 379
367 380 ipy_prompt.look_in_string = True
368 381
369 382
370 383 @CoroutineInputTransformer.wrap
371 384 def leading_indent():
372 385 """Remove leading indentation.
373 386
374 387 If the first line starts with a spaces or tabs, the same whitespace will be
375 388 removed from each following line until it is reset.
376 389 """
377 390 space_re = re.compile(r'^[ \t]+')
378 391 line = ''
379 392 while True:
380 393 line = (yield line)
381 394
382 395 if line is None:
383 396 continue
384 397
385 398 m = space_re.match(line)
386 399 if m:
387 400 space = m.group(0)
388 401 while line is not None:
389 402 if line.startswith(space):
390 403 line = line[len(space):]
391 404 line = (yield line)
392 405 else:
393 406 # No leading spaces - wait for reset
394 407 while line is not None:
395 408 line = (yield line)
396 409
397 410 leading_indent.look_in_string = True
398 411
399 412
400 413 def _special_assignment(assignment_re, template):
401 414 """Transform assignment from system & magic commands.
402 415
403 416 This is stateful so that it can handle magic commands continued on several
404 417 lines.
405 418 """
406 419 line = ''
407 420 while True:
408 421 line = (yield line)
409 422 if not line or line.isspace():
410 423 continue
411 424
412 425 m = assignment_re.match(line)
413 426 if not m:
414 427 continue
415 428
416 429 parts = []
417 430 while line is not None:
418 431 parts.append(line.rstrip('\\'))
419 432 if not line.endswith('\\'):
420 433 break
421 434 line = (yield None)
422 435
423 436 # Output
424 437 whole = assignment_re.match(' '.join(parts))
425 438 line = template % (whole.group('lhs'), whole.group('cmd'))
426 439
427 440 @CoroutineInputTransformer.wrap
428 441 def assign_from_system():
429 442 """Transform assignment from system commands (e.g. files = !ls)"""
430 443 assignment_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
431 444 r'\s*=\s*!\s*(?P<cmd>.*)')
432 445 template = '%s = get_ipython().getoutput(%r)'
433 446 return _special_assignment(assignment_re, template)
434 447
435 448 @CoroutineInputTransformer.wrap
436 449 def assign_from_magic():
437 450 """Transform assignment from magic commands (e.g. a = %who_ls)"""
438 451 assignment_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
439 452 r'\s*=\s*%\s*(?P<cmd>.*)')
440 453 template = '%s = get_ipython().magic(%r)'
441 454 return _special_assignment(assignment_re, template)
General Comments 0
You need to be logged in to leave comments. Login now