##// END OF EJS Templates
add strip_encoding_cookie transformer...
MinRK -
Show More
@@ -1,657 +1,659 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10
11 11 For more details, see the class docstring below.
12 12
13 13 Syntax Transformations
14 14 ----------------------
15 15
16 16 One of the main jobs of the code in this file is to apply all syntax
17 17 transformations that make up 'the IPython language', i.e. magics, shell
18 18 escapes, etc. All transformations should be implemented as *fully stateless*
19 19 entities, that simply take one line as their input and return a line.
20 20 Internally for implementation purposes they may be a normal function or a
21 21 callable object, but the only input they receive will be a single line and they
22 22 should only return a line, without holding any data-dependent state between
23 23 calls.
24 24
25 25 As an example, the EscapedTransformer is a class so we can more clearly group
26 26 together the functionality of dispatching to individual functions based on the
27 27 starting escape character, but the only method for public use is its call
28 28 method.
29 29
30 30
31 31 ToDo
32 32 ----
33 33
34 34 - Should we make push() actually raise an exception once push_accepts_more()
35 35 returns False?
36 36
37 37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 38 at least just attributes of a class so not really very exposed.
39 39
40 40 - Think about the best way to support dynamic things: automagic, autocall,
41 41 macros, etc.
42 42
43 43 - Think of a better heuristic for the application of the transforms in
44 44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 45 track indentation change events (indent, dedent, nothing) and apply them only
46 46 if the indentation went up, but not otherwise.
47 47
48 48 - Think of the cleanest way for supporting user-specified transformations (the
49 49 user prefilters we had before).
50 50
51 51 Authors
52 52 -------
53 53
54 54 * Fernando Perez
55 55 * Brian Granger
56 56 """
57 57 #-----------------------------------------------------------------------------
58 58 # Copyright (C) 2010 The IPython Development Team
59 59 #
60 60 # Distributed under the terms of the BSD License. The full license is in
61 61 # the file COPYING, distributed as part of this software.
62 62 #-----------------------------------------------------------------------------
63 63
64 64 #-----------------------------------------------------------------------------
65 65 # Imports
66 66 #-----------------------------------------------------------------------------
67 67 # stdlib
68 68 import ast
69 69 import codeop
70 70 import re
71 71 import sys
72 72
73 73 # IPython modules
74 74 from IPython.utils.py3compat import cast_unicode
75 75 from IPython.core.inputtransformer import (leading_indent,
76 76 classic_prompt,
77 77 ipy_prompt,
78 strip_encoding_cookie,
78 79 cellmagic,
79 80 assemble_logical_lines,
80 81 help_end,
81 82 escaped_commands,
82 83 assign_from_magic,
83 84 assign_from_system,
84 85 assemble_python_lines,
85 86 )
86 87
87 88 # These are available in this module for backwards compatibility.
88 89 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
89 90 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
90 91 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
91 92
92 93 #-----------------------------------------------------------------------------
93 94 # Utilities
94 95 #-----------------------------------------------------------------------------
95 96
96 97 # FIXME: These are general-purpose utilities that later can be moved to the
97 98 # general ward. Kept here for now because we're being very strict about test
98 99 # coverage with this code, and this lets us ensure that we keep 100% coverage
99 100 # while developing.
100 101
101 102 # compiled regexps for autoindent management
102 103 dedent_re = re.compile('|'.join([
103 104 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
104 105 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
105 106 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
106 107 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
107 108 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
108 109 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
109 110 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
110 111 ]))
111 112 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
112 113
113 114 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
114 115 # before pure comments
115 116 comment_line_re = re.compile('^\s*\#')
116 117
117 118
118 119 def num_ini_spaces(s):
119 120 """Return the number of initial spaces in a string.
120 121
121 122 Note that tabs are counted as a single space. For now, we do *not* support
122 123 mixing of tabs and spaces in the user's input.
123 124
124 125 Parameters
125 126 ----------
126 127 s : string
127 128
128 129 Returns
129 130 -------
130 131 n : int
131 132 """
132 133
133 134 ini_spaces = ini_spaces_re.match(s)
134 135 if ini_spaces:
135 136 return ini_spaces.end()
136 137 else:
137 138 return 0
138 139
139 140 def last_blank(src):
140 141 """Determine if the input source ends in a blank.
141 142
142 143 A blank is either a newline or a line consisting of whitespace.
143 144
144 145 Parameters
145 146 ----------
146 147 src : string
147 148 A single or multiline string.
148 149 """
149 150 if not src: return False
150 151 ll = src.splitlines()[-1]
151 152 return (ll == '') or ll.isspace()
152 153
153 154
154 155 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
155 156 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
156 157
157 158 def last_two_blanks(src):
158 159 """Determine if the input source ends in two blanks.
159 160
160 161 A blank is either a newline or a line consisting of whitespace.
161 162
162 163 Parameters
163 164 ----------
164 165 src : string
165 166 A single or multiline string.
166 167 """
167 168 if not src: return False
168 169 # The logic here is tricky: I couldn't get a regexp to work and pass all
169 170 # the tests, so I took a different approach: split the source by lines,
170 171 # grab the last two and prepend '###\n' as a stand-in for whatever was in
171 172 # the body before the last two lines. Then, with that structure, it's
172 173 # possible to analyze with two regexps. Not the most elegant solution, but
173 174 # it works. If anyone tries to change this logic, make sure to validate
174 175 # the whole test suite first!
175 176 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
176 177 return (bool(last_two_blanks_re.match(new_src)) or
177 178 bool(last_two_blanks_re2.match(new_src)) )
178 179
179 180
180 181 def remove_comments(src):
181 182 """Remove all comments from input source.
182 183
183 184 Note: comments are NOT recognized inside of strings!
184 185
185 186 Parameters
186 187 ----------
187 188 src : string
188 189 A single or multiline input string.
189 190
190 191 Returns
191 192 -------
192 193 String with all Python comments removed.
193 194 """
194 195
195 196 return re.sub('#.*', '', src)
196 197
197 198
198 199 def get_input_encoding():
199 200 """Return the default standard input encoding.
200 201
201 202 If sys.stdin has no encoding, 'ascii' is returned."""
202 203 # There are strange environments for which sys.stdin.encoding is None. We
203 204 # ensure that a valid encoding is returned.
204 205 encoding = getattr(sys.stdin, 'encoding', None)
205 206 if encoding is None:
206 207 encoding = 'ascii'
207 208 return encoding
208 209
209 210 #-----------------------------------------------------------------------------
210 211 # Classes and functions for normal Python syntax handling
211 212 #-----------------------------------------------------------------------------
212 213
213 214 class InputSplitter(object):
214 215 """An object that can accumulate lines of Python source before execution.
215 216
216 217 This object is designed to be fed python source line-by-line, using
217 218 :meth:`push`. It will return on each push whether the currently pushed
218 219 code could be executed already. In addition, it provides a method called
219 220 :meth:`push_accepts_more` that can be used to query whether more input
220 221 can be pushed into a single interactive block.
221 222
222 223 This is a simple example of how an interactive terminal-based client can use
223 224 this tool::
224 225
225 226 isp = InputSplitter()
226 227 while isp.push_accepts_more():
227 228 indent = ' '*isp.indent_spaces
228 229 prompt = '>>> ' + indent
229 230 line = indent + raw_input(prompt)
230 231 isp.push(line)
231 232 print 'Input source was:\n', isp.source_reset(),
232 233 """
233 234 # Number of spaces of indentation computed from input that has been pushed
234 235 # so far. This is the attributes callers should query to get the current
235 236 # indentation level, in order to provide auto-indent facilities.
236 237 indent_spaces = 0
237 238 # String, indicating the default input encoding. It is computed by default
238 239 # at initialization time via get_input_encoding(), but it can be reset by a
239 240 # client with specific knowledge of the encoding.
240 241 encoding = ''
241 242 # String where the current full source input is stored, properly encoded.
242 243 # Reading this attribute is the normal way of querying the currently pushed
243 244 # source code, that has been properly encoded.
244 245 source = ''
245 246 # Code object corresponding to the current source. It is automatically
246 247 # synced to the source, so it can be queried at any time to obtain the code
247 248 # object; it will be None if the source doesn't compile to valid Python.
248 249 code = None
249 250
250 251 # Private attributes
251 252
252 253 # List with lines of input accumulated so far
253 254 _buffer = None
254 255 # Command compiler
255 256 _compile = None
256 257 # Mark when input has changed indentation all the way back to flush-left
257 258 _full_dedent = False
258 259 # Boolean indicating whether the current block is complete
259 260 _is_complete = None
260 261
261 262 def __init__(self):
262 263 """Create a new InputSplitter instance.
263 264 """
264 265 self._buffer = []
265 266 self._compile = codeop.CommandCompiler()
266 267 self.encoding = get_input_encoding()
267 268
268 269 def reset(self):
269 270 """Reset the input buffer and associated state."""
270 271 self.indent_spaces = 0
271 272 self._buffer[:] = []
272 273 self.source = ''
273 274 self.code = None
274 275 self._is_complete = False
275 276 self._full_dedent = False
276 277
277 278 def source_reset(self):
278 279 """Return the input source and perform a full reset.
279 280 """
280 281 out = self.source
281 282 self.reset()
282 283 return out
283 284
284 285 def push(self, lines):
285 286 """Push one or more lines of input.
286 287
287 288 This stores the given lines and returns a status code indicating
288 289 whether the code forms a complete Python block or not.
289 290
290 291 Any exceptions generated in compilation are swallowed, but if an
291 292 exception was produced, the method returns True.
292 293
293 294 Parameters
294 295 ----------
295 296 lines : string
296 297 One or more lines of Python input.
297 298
298 299 Returns
299 300 -------
300 301 is_complete : boolean
301 302 True if the current input source (the result of the current input
302 303 plus prior inputs) forms a complete Python execution block. Note that
303 304 this value is also stored as a private attribute (``_is_complete``), so it
304 305 can be queried at any time.
305 306 """
306 307 self._store(lines)
307 308 source = self.source
308 309
309 310 # Before calling _compile(), reset the code object to None so that if an
310 311 # exception is raised in compilation, we don't mislead by having
311 312 # inconsistent code/source attributes.
312 313 self.code, self._is_complete = None, None
313 314
314 315 # Honor termination lines properly
315 316 if source.endswith('\\\n'):
316 317 return False
317 318
318 319 self._update_indent(lines)
319 320 try:
320 321 self.code = self._compile(source, symbol="exec")
321 322 # Invalid syntax can produce any of a number of different errors from
322 323 # inside the compiler, so we have to catch them all. Syntax errors
323 324 # immediately produce a 'ready' block, so the invalid Python can be
324 325 # sent to the kernel for evaluation with possible ipython
325 326 # special-syntax conversion.
326 327 except (SyntaxError, OverflowError, ValueError, TypeError,
327 328 MemoryError):
328 329 self._is_complete = True
329 330 else:
330 331 # Compilation didn't produce any exceptions (though it may not have
331 332 # given a complete code object)
332 333 self._is_complete = self.code is not None
333 334
334 335 return self._is_complete
335 336
336 337 def push_accepts_more(self):
337 338 """Return whether a block of interactive input can accept more input.
338 339
339 340 This method is meant to be used by line-oriented frontends, who need to
340 341 guess whether a block is complete or not based solely on prior and
341 342 current input lines. The InputSplitter considers it has a complete
342 343 interactive block and will not accept more input when either:
343 344
344 345 * A SyntaxError is raised
345 346
346 347 * The code is complete and consists of a single line or a single
347 348 non-compound statement
348 349
349 350 * The code is complete and has a blank line at the end
350 351
351 352 If the current input produces a syntax error, this method immediately
352 353 returns False but does *not* raise the syntax error exception, as
353 354 typically clients will want to send invalid syntax to an execution
354 355 backend which might convert the invalid syntax into valid Python via
355 356 one of the dynamic IPython mechanisms.
356 357 """
357 358
358 359 # With incomplete input, unconditionally accept more
359 360 # A syntax error also sets _is_complete to True - see push()
360 361 if not self._is_complete:
361 362 #print("Not complete") # debug
362 363 return True
363 364
364 365 # The user can make any (complete) input execute by leaving a blank line
365 366 last_line = self.source.splitlines()[-1]
366 367 if (not last_line) or last_line.isspace():
367 368 #print("Blank line") # debug
368 369 return False
369 370
370 371 # If there's just a single line or AST node, and we're flush left, as is
371 372 # the case after a simple statement such as 'a=1', we want to execute it
372 373 # straight away.
373 374 if self.indent_spaces==0:
374 375 if len(self.source.splitlines()) <= 1:
375 376 return False
376 377
377 378 try:
378 379 code_ast = ast.parse(u''.join(self._buffer))
379 380 except Exception:
380 381 #print("Can't parse AST") # debug
381 382 return False
382 383 else:
383 384 if len(code_ast.body) == 1 and \
384 385 not hasattr(code_ast.body[0], 'body'):
385 386 #print("Simple statement") # debug
386 387 return False
387 388
388 389 # General fallback - accept more code
389 390 return True
390 391
391 392 #------------------------------------------------------------------------
392 393 # Private interface
393 394 #------------------------------------------------------------------------
394 395
395 396 def _find_indent(self, line):
396 397 """Compute the new indentation level for a single line.
397 398
398 399 Parameters
399 400 ----------
400 401 line : str
401 402 A single new line of non-whitespace, non-comment Python input.
402 403
403 404 Returns
404 405 -------
405 406 indent_spaces : int
406 407 New value for the indent level (it may be equal to self.indent_spaces
407 408 if indentation doesn't change.
408 409
409 410 full_dedent : boolean
410 411 Whether the new line causes a full flush-left dedent.
411 412 """
412 413 indent_spaces = self.indent_spaces
413 414 full_dedent = self._full_dedent
414 415
415 416 inisp = num_ini_spaces(line)
416 417 if inisp < indent_spaces:
417 418 indent_spaces = inisp
418 419 if indent_spaces <= 0:
419 420 #print 'Full dedent in text',self.source # dbg
420 421 full_dedent = True
421 422
422 423 if line.rstrip()[-1] == ':':
423 424 indent_spaces += 4
424 425 elif dedent_re.match(line):
425 426 indent_spaces -= 4
426 427 if indent_spaces <= 0:
427 428 full_dedent = True
428 429
429 430 # Safety
430 431 if indent_spaces < 0:
431 432 indent_spaces = 0
432 433 #print 'safety' # dbg
433 434
434 435 return indent_spaces, full_dedent
435 436
436 437 def _update_indent(self, lines):
437 438 for line in remove_comments(lines).splitlines():
438 439 if line and not line.isspace():
439 440 self.indent_spaces, self._full_dedent = self._find_indent(line)
440 441
441 442 def _store(self, lines, buffer=None, store='source'):
442 443 """Store one or more lines of input.
443 444
444 445 If input lines are not newline-terminated, a newline is automatically
445 446 appended."""
446 447
447 448 if buffer is None:
448 449 buffer = self._buffer
449 450
450 451 if lines.endswith('\n'):
451 452 buffer.append(lines)
452 453 else:
453 454 buffer.append(lines+'\n')
454 455 setattr(self, store, self._set_source(buffer))
455 456
456 457 def _set_source(self, buffer):
457 458 return u''.join(buffer)
458 459
459 460
460 461 class IPythonInputSplitter(InputSplitter):
461 462 """An input splitter that recognizes all of IPython's special syntax."""
462 463
463 464 # String with raw, untransformed input.
464 465 source_raw = ''
465 466
466 467 # Flag to track when a transformer has stored input that it hasn't given
467 468 # back yet.
468 469 transformer_accumulating = False
469 470
470 471 # Flag to track when assemble_python_lines has stored input that it hasn't
471 472 # given back yet.
472 473 within_python_line = False
473 474
474 475 # Private attributes
475 476
476 477 # List with lines of raw input accumulated so far.
477 478 _buffer_raw = None
478 479
479 480 def __init__(self, line_input_checker=True, physical_line_transforms=None,
480 481 logical_line_transforms=None, python_line_transforms=None):
481 482 super(IPythonInputSplitter, self).__init__()
482 483 self._buffer_raw = []
483 484 self._validate = True
484 485
485 486 if physical_line_transforms is not None:
486 487 self.physical_line_transforms = physical_line_transforms
487 488 else:
488 489 self.physical_line_transforms = [leading_indent(),
489 490 classic_prompt(),
490 491 ipy_prompt(),
492 strip_encoding_cookie(),
491 493 ]
492 494
493 495 self.assemble_logical_lines = assemble_logical_lines()
494 496 if logical_line_transforms is not None:
495 497 self.logical_line_transforms = logical_line_transforms
496 498 else:
497 499 self.logical_line_transforms = [cellmagic(end_on_blank_line=line_input_checker),
498 500 help_end(),
499 501 escaped_commands(),
500 502 assign_from_magic(),
501 503 assign_from_system(),
502 504 ]
503 505
504 506 self.assemble_python_lines = assemble_python_lines()
505 507 if python_line_transforms is not None:
506 508 self.python_line_transforms = python_line_transforms
507 509 else:
508 510 # We don't use any of these at present
509 511 self.python_line_transforms = []
510 512
511 513 @property
512 514 def transforms(self):
513 515 "Quick access to all transformers."
514 516 return self.physical_line_transforms + \
515 517 [self.assemble_logical_lines] + self.logical_line_transforms + \
516 518 [self.assemble_python_lines] + self.python_line_transforms
517 519
518 520 @property
519 521 def transforms_in_use(self):
520 522 """Transformers, excluding logical line transformers if we're in a
521 523 Python line."""
522 524 t = self.physical_line_transforms[:]
523 525 if not self.within_python_line:
524 526 t += [self.assemble_logical_lines] + self.logical_line_transforms
525 527 return t + [self.assemble_python_lines] + self.python_line_transforms
526 528
527 529 def reset(self):
528 530 """Reset the input buffer and associated state."""
529 531 super(IPythonInputSplitter, self).reset()
530 532 self._buffer_raw[:] = []
531 533 self.source_raw = ''
532 534 self.transformer_accumulating = False
533 535 self.within_python_line = False
534 536 for t in self.transforms:
535 537 t.reset()
536 538
537 539 def flush_transformers(self):
538 540 def _flush(transform, out):
539 541 if out is not None:
540 542 tmp = transform.push(out)
541 543 return tmp or transform.reset() or None
542 544 else:
543 545 return transform.reset() or None
544 546
545 547 out = None
546 548 for t in self.transforms_in_use:
547 549 out = _flush(t, out)
548 550
549 551 if out is not None:
550 552 self._store(out)
551 553
552 554 def source_raw_reset(self):
553 555 """Return input and raw source and perform a full reset.
554 556 """
555 557 self.flush_transformers()
556 558 out = self.source
557 559 out_r = self.source_raw
558 560 self.reset()
559 561 return out, out_r
560 562
561 563 def source_reset(self):
562 564 self.flush_transformers()
563 565 return super(IPythonInputSplitter, self).source_reset()
564 566
565 567 def push_accepts_more(self):
566 568 if self.transformer_accumulating:
567 569 return True
568 570 else:
569 571 return super(IPythonInputSplitter, self).push_accepts_more()
570 572
571 573 def transform_cell(self, cell):
572 574 """Process and translate a cell of input.
573 575 """
574 576 self.reset()
575 577 self.push(cell)
576 578 return self.source_reset()
577 579
578 580 def push(self, lines):
579 581 """Push one or more lines of IPython input.
580 582
581 583 This stores the given lines and returns a status code indicating
582 584 whether the code forms a complete Python block or not, after processing
583 585 all input lines for special IPython syntax.
584 586
585 587 Any exceptions generated in compilation are swallowed, but if an
586 588 exception was produced, the method returns True.
587 589
588 590 Parameters
589 591 ----------
590 592 lines : string
591 593 One or more lines of Python input.
592 594
593 595 Returns
594 596 -------
595 597 is_complete : boolean
596 598 True if the current input source (the result of the current input
597 599 plus prior inputs) forms a complete Python execution block. Note that
598 600 this value is also stored as a private attribute (_is_complete), so it
599 601 can be queried at any time.
600 602 """
601 603
602 604 # We must ensure all input is pure unicode
603 605 lines = cast_unicode(lines, self.encoding)
604 606
605 607 # ''.splitlines() --> [], but we need to push the empty line to transformers
606 608 lines_list = lines.splitlines()
607 609 if not lines_list:
608 610 lines_list = ['']
609 611
610 612 # Store raw source before applying any transformations to it. Note
611 613 # that this must be done *after* the reset() call that would otherwise
612 614 # flush the buffer.
613 615 self._store(lines, self._buffer_raw, 'source_raw')
614 616
615 617 for line in lines_list:
616 618 out = self.push_line(line)
617 619
618 620 return out
619 621
620 622 def push_line(self, line):
621 623 buf = self._buffer
622 624
623 625 def _accumulating(dbg):
624 626 #print(dbg)
625 627 self.transformer_accumulating = True
626 628 return False
627 629
628 630 for transformer in self.physical_line_transforms:
629 631 line = transformer.push(line)
630 632 if line is None:
631 633 return _accumulating(transformer)
632 634
633 635 if not self.within_python_line:
634 636 line = self.assemble_logical_lines.push(line)
635 637 if line is None:
636 638 return _accumulating('acc logical line')
637 639
638 640 for transformer in self.logical_line_transforms:
639 641 line = transformer.push(line)
640 642 if line is None:
641 643 return _accumulating(transformer)
642 644
643 645 line = self.assemble_python_lines.push(line)
644 646 if line is None:
645 647 self.within_python_line = True
646 648 return _accumulating('acc python line')
647 649 else:
648 650 self.within_python_line = False
649 651
650 652 for transformer in self.python_line_transforms:
651 653 line = transformer.push(line)
652 654 if line is None:
653 655 return _accumulating(transformer)
654 656
655 657 #print("transformers clear") #debug
656 658 self.transformer_accumulating = False
657 659 return super(IPythonInputSplitter, self).push(line)
@@ -1,447 +1,472 b''
1 1 import abc
2 2 import functools
3 3 import re
4 4 from StringIO import StringIO
5 5
6 6 from IPython.core.splitinput import LineInfo
7 7 from IPython.utils import tokenize2
8 from IPython.utils.openpy import cookie_comment_re
8 9 from IPython.utils.tokenize2 import generate_tokens, untokenize, TokenError
9 10
10 11 #-----------------------------------------------------------------------------
11 12 # Globals
12 13 #-----------------------------------------------------------------------------
13 14
14 15 # The escape sequences that define the syntax transformations IPython will
15 16 # apply to user input. These can NOT be just changed here: many regular
16 17 # expressions and other parts of the code may use their hardcoded values, and
17 18 # for all intents and purposes they constitute the 'IPython syntax', so they
18 19 # should be considered fixed.
19 20
20 21 ESC_SHELL = '!' # Send line to underlying system shell
21 22 ESC_SH_CAP = '!!' # Send line to system shell and capture output
22 23 ESC_HELP = '?' # Find information about object
23 24 ESC_HELP2 = '??' # Find extra-detailed information about object
24 25 ESC_MAGIC = '%' # Call magic function
25 26 ESC_MAGIC2 = '%%' # Call cell-magic function
26 27 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
27 28 ESC_QUOTE2 = ';' # Quote all args as a single string, call
28 29 ESC_PAREN = '/' # Call first argument with rest of line as arguments
29 30
30 31 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
31 32 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
32 33 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
33 34
34 35
35 36 class InputTransformer(object):
36 37 """Abstract base class for line-based input transformers."""
37 38 __metaclass__ = abc.ABCMeta
38 39
39 40 @abc.abstractmethod
40 41 def push(self, line):
41 42 """Send a line of input to the transformer, returning the transformed
42 43 input or None if the transformer is waiting for more input.
43 44
44 45 Must be overridden by subclasses.
45 46 """
46 47 pass
47 48
48 49 @abc.abstractmethod
49 50 def reset(self):
50 51 """Return, transformed any lines that the transformer has accumulated,
51 52 and reset its internal state.
52 53
53 54 Must be overridden by subclasses.
54 55 """
55 56 pass
56 57
57 58 @classmethod
58 59 def wrap(cls, func):
59 60 """Can be used by subclasses as a decorator, to return a factory that
60 61 will allow instantiation with the decorated object.
61 62 """
62 63 @functools.wraps(func)
63 64 def transformer_factory(**kwargs):
64 65 return cls(func, **kwargs)
65 66
66 67 return transformer_factory
67 68
68 69 class StatelessInputTransformer(InputTransformer):
69 70 """Wrapper for a stateless input transformer implemented as a function."""
70 71 def __init__(self, func):
71 72 self.func = func
72 73
73 74 def __repr__(self):
74 75 return "StatelessInputTransformer(func={0!r})".format(self.func)
75 76
76 77 def push(self, line):
77 78 """Send a line of input to the transformer, returning the
78 79 transformed input."""
79 80 return self.func(line)
80 81
81 82 def reset(self):
82 83 """No-op - exists for compatibility."""
83 84 pass
84 85
85 86 class CoroutineInputTransformer(InputTransformer):
86 87 """Wrapper for an input transformer implemented as a coroutine."""
87 88 def __init__(self, coro, **kwargs):
88 89 # Prime it
89 90 self.coro = coro(**kwargs)
90 91 next(self.coro)
91 92
92 93 def __repr__(self):
93 94 return "CoroutineInputTransformer(coro={0!r})".format(self.coro)
94 95
95 96 def push(self, line):
96 97 """Send a line of input to the transformer, returning the
97 98 transformed input or None if the transformer is waiting for more
98 99 input.
99 100 """
100 101 return self.coro.send(line)
101 102
102 103 def reset(self):
103 104 """Return, transformed any lines that the transformer has
104 105 accumulated, and reset its internal state.
105 106 """
106 107 return self.coro.send(None)
107 108
108 109 class TokenInputTransformer(InputTransformer):
109 110 """Wrapper for a token-based input transformer.
110 111
111 112 func should accept a list of tokens (5-tuples, see tokenize docs), and
112 113 return an iterable which can be passed to tokenize.untokenize().
113 114 """
114 115 def __init__(self, func):
115 116 self.func = func
116 117 self.current_line = ""
117 118 self.line_used = False
118 119 self.reset_tokenizer()
119 120
120 121 def reset_tokenizer(self):
121 122 self.tokenizer = generate_tokens(self.get_line)
122 123
123 124 def get_line(self):
124 125 if self.line_used:
125 126 raise TokenError
126 127 self.line_used = True
127 128 return self.current_line
128 129
129 130 def push(self, line):
130 131 self.current_line += line + "\n"
131 132 if self.current_line.isspace():
132 133 return self.reset()
133 134
134 135 self.line_used = False
135 136 tokens = []
136 137 stop_at_NL = False
137 138 try:
138 139 for intok in self.tokenizer:
139 140 tokens.append(intok)
140 141 t = intok[0]
141 142 if t == tokenize2.NEWLINE or (stop_at_NL and t == tokenize2.NL):
142 143 # Stop before we try to pull a line we don't have yet
143 144 break
144 145 elif t == tokenize2.ERRORTOKEN:
145 146 stop_at_NL = True
146 147 except TokenError:
147 148 # Multi-line statement - stop and try again with the next line
148 149 self.reset_tokenizer()
149 150 return None
150 151
151 152 return self.output(tokens)
152 153
153 154 def output(self, tokens):
154 155 self.current_line = ""
155 156 self.reset_tokenizer()
156 157 return untokenize(self.func(tokens)).rstrip('\n')
157 158
158 159 def reset(self):
159 160 l = self.current_line
160 161 self.current_line = ""
161 162 self.reset_tokenizer()
162 163 if l:
163 164 return l.rstrip('\n')
164 165
165 166 class assemble_python_lines(TokenInputTransformer):
166 167 def __init__(self):
167 168 super(assemble_python_lines, self).__init__(None)
168 169
169 170 def output(self, tokens):
170 171 return self.reset()
171 172
172 173 @CoroutineInputTransformer.wrap
173 174 def assemble_logical_lines():
174 175 """Join lines following explicit line continuations (\)"""
175 176 line = ''
176 177 while True:
177 178 line = (yield line)
178 179 if not line or line.isspace():
179 180 continue
180 181
181 182 parts = []
182 183 while line is not None:
183 184 if line.endswith('\\') and (not has_comment(line)):
184 185 parts.append(line[:-1])
185 186 line = (yield None) # Get another line
186 187 else:
187 188 parts.append(line)
188 189 break
189 190
190 191 # Output
191 192 line = ''.join(parts)
192 193
193 194 # Utilities
194 195 def _make_help_call(target, esc, lspace, next_input=None):
195 196 """Prepares a pinfo(2)/psearch call from a target name and the escape
196 197 (i.e. ? or ??)"""
197 198 method = 'pinfo2' if esc == '??' \
198 199 else 'psearch' if '*' in target \
199 200 else 'pinfo'
200 201 arg = " ".join([method, target])
201 202 if next_input is None:
202 203 return '%sget_ipython().magic(%r)' % (lspace, arg)
203 204 else:
204 205 return '%sget_ipython().set_next_input(%r);get_ipython().magic(%r)' % \
205 206 (lspace, next_input, arg)
206 207
207 208 # These define the transformations for the different escape characters.
208 209 def _tr_system(line_info):
209 210 "Translate lines escaped with: !"
210 211 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
211 212 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
212 213
213 214 def _tr_system2(line_info):
214 215 "Translate lines escaped with: !!"
215 216 cmd = line_info.line.lstrip()[2:]
216 217 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
217 218
218 219 def _tr_help(line_info):
219 220 "Translate lines escaped with: ?/??"
220 221 # A naked help line should just fire the intro help screen
221 222 if not line_info.line[1:]:
222 223 return 'get_ipython().show_usage()'
223 224
224 225 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
225 226
226 227 def _tr_magic(line_info):
227 228 "Translate lines escaped with: %"
228 229 tpl = '%sget_ipython().magic(%r)'
229 230 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
230 231 return tpl % (line_info.pre, cmd)
231 232
232 233 def _tr_quote(line_info):
233 234 "Translate lines escaped with: ,"
234 235 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
235 236 '", "'.join(line_info.the_rest.split()) )
236 237
237 238 def _tr_quote2(line_info):
238 239 "Translate lines escaped with: ;"
239 240 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
240 241 line_info.the_rest)
241 242
242 243 def _tr_paren(line_info):
243 244 "Translate lines escaped with: /"
244 245 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
245 246 ", ".join(line_info.the_rest.split()))
246 247
247 248 tr = { ESC_SHELL : _tr_system,
248 249 ESC_SH_CAP : _tr_system2,
249 250 ESC_HELP : _tr_help,
250 251 ESC_HELP2 : _tr_help,
251 252 ESC_MAGIC : _tr_magic,
252 253 ESC_QUOTE : _tr_quote,
253 254 ESC_QUOTE2 : _tr_quote2,
254 255 ESC_PAREN : _tr_paren }
255 256
256 257 @StatelessInputTransformer.wrap
257 258 def escaped_commands(line):
258 259 """Transform escaped commands - %magic, !system, ?help + various autocalls.
259 260 """
260 261 if not line or line.isspace():
261 262 return line
262 263 lineinf = LineInfo(line)
263 264 if lineinf.esc not in tr:
264 265 return line
265 266
266 267 return tr[lineinf.esc](lineinf)
267 268
268 269 _initial_space_re = re.compile(r'\s*')
269 270
270 271 _help_end_re = re.compile(r"""(%{0,2}
271 272 [a-zA-Z_*][\w*]* # Variable name
272 273 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
273 274 )
274 275 (\?\??)$ # ? or ??""",
275 276 re.VERBOSE)
276 277
277 278 def has_comment(src):
278 279 """Indicate whether an input line has (i.e. ends in, or is) a comment.
279 280
280 281 This uses tokenize, so it can distinguish comments from # inside strings.
281 282
282 283 Parameters
283 284 ----------
284 285 src : string
285 286 A single line input string.
286 287
287 288 Returns
288 289 -------
289 290 comment : bool
290 291 True if source has a comment.
291 292 """
292 293 readline = StringIO(src).readline
293 294 toktypes = set()
294 295 try:
295 296 for t in generate_tokens(readline):
296 297 toktypes.add(t[0])
297 298 except TokenError:
298 299 pass
299 300 return(tokenize2.COMMENT in toktypes)
300 301
301 302
302 303 @StatelessInputTransformer.wrap
303 304 def help_end(line):
304 305 """Translate lines with ?/?? at the end"""
305 306 m = _help_end_re.search(line)
306 307 if m is None or has_comment(line):
307 308 return line
308 309 target = m.group(1)
309 310 esc = m.group(3)
310 311 lspace = _initial_space_re.match(line).group(0)
311 312
312 313 # If we're mid-command, put it back on the next prompt for the user.
313 314 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
314 315
315 316 return _make_help_call(target, esc, lspace, next_input)
316 317
317 318
318 319 @CoroutineInputTransformer.wrap
319 320 def cellmagic(end_on_blank_line=False):
320 321 """Captures & transforms cell magics.
321 322
322 323 After a cell magic is started, this stores up any lines it gets until it is
323 324 reset (sent None).
324 325 """
325 326 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
326 327 cellmagic_help_re = re.compile('%%\w+\?')
327 328 line = ''
328 329 while True:
329 330 line = (yield line)
330 331 if (not line) or (not line.startswith(ESC_MAGIC2)):
331 332 continue
332 333
333 334 if cellmagic_help_re.match(line):
334 335 # This case will be handled by help_end
335 336 continue
336 337
337 338 first = line
338 339 body = []
339 340 line = (yield None)
340 341 while (line is not None) and \
341 342 ((line.strip() != '') or not end_on_blank_line):
342 343 body.append(line)
343 344 line = (yield None)
344 345
345 346 # Output
346 347 magic_name, _, first = first.partition(' ')
347 348 magic_name = magic_name.lstrip(ESC_MAGIC2)
348 349 line = tpl % (magic_name, first, u'\n'.join(body))
349 350
350 351
351 352 def _strip_prompts(prompt_re):
352 353 """Remove matching input prompts from a block of input."""
353 354 line = ''
354 355 while True:
355 356 line = (yield line)
356 357
357 358 # First line of cell
358 359 if line is None:
359 360 continue
360 361 out, n1 = prompt_re.subn('', line, count=1)
361 362 line = (yield out)
362 363
363 364 # Second line of cell, because people often copy from just after the
364 365 # first prompt, so we might not see it in the first line.
365 366 if line is None:
366 367 continue
367 368 out, n2 = prompt_re.subn('', line, count=1)
368 369 line = (yield out)
369 370
370 371 if n1 or n2:
371 372 # Found the input prompt in the first two lines - check for it in
372 373 # the rest of the cell as well.
373 374 while line is not None:
374 375 line = (yield prompt_re.sub('', line, count=1))
375 376
376 377 else:
377 378 # Prompts not in input - wait for reset
378 379 while line is not None:
379 380 line = (yield line)
380 381
381 382 @CoroutineInputTransformer.wrap
382 383 def classic_prompt():
383 384 """Strip the >>>/... prompts of the Python interactive shell."""
384 385 # FIXME: non-capturing version (?:...) usable?
385 386 prompt_re = re.compile(r'^(>>> ?|\.\.\. ?)')
386 387 return _strip_prompts(prompt_re)
387 388
388 389 @CoroutineInputTransformer.wrap
389 390 def ipy_prompt():
390 391 """Strip IPython's In [1]:/...: prompts."""
391 392 # FIXME: non-capturing version (?:...) usable?
392 393 # FIXME: r'^(In \[\d+\]: | {3}\.{3,}: )' clearer?
393 394 prompt_re = re.compile(r'^(In \[\d+\]: |\ \ \ \.\.\.+: )')
394 395 return _strip_prompts(prompt_re)
395 396
396 397
397 398 @CoroutineInputTransformer.wrap
398 399 def leading_indent():
399 400 """Remove leading indentation.
400 401
401 402 If the first line starts with a spaces or tabs, the same whitespace will be
402 403 removed from each following line until it is reset.
403 404 """
404 405 space_re = re.compile(r'^[ \t]+')
405 406 line = ''
406 407 while True:
407 408 line = (yield line)
408 409
409 410 if line is None:
410 411 continue
411 412
412 413 m = space_re.match(line)
413 414 if m:
414 415 space = m.group(0)
415 416 while line is not None:
416 417 if line.startswith(space):
417 418 line = line[len(space):]
418 419 line = (yield line)
419 420 else:
420 421 # No leading spaces - wait for reset
421 422 while line is not None:
422 423 line = (yield line)
423 424
424 425
426 @CoroutineInputTransformer.wrap
427 def strip_encoding_cookie():
428 """Remove encoding comment if found in first two lines
429
430 If the first or second line has the `# coding: utf-8` comment,
431 it will be removed.
432 """
433 line = ''
434 while True:
435 line = (yield line)
436 # check comment on first two lines
437 for i in range(2):
438 if line is None:
439 break
440 if cookie_comment_re.match(line):
441 line = (yield "")
442 else:
443 line = (yield line)
444
445 # no-op on the rest of the cell
446 while line is not None:
447 line = (yield line)
448
449
425 450 assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
426 451 r'\s*=\s*!\s*(?P<cmd>.*)')
427 452 assign_system_template = '%s = get_ipython().getoutput(%r)'
428 453 @StatelessInputTransformer.wrap
429 454 def assign_from_system(line):
430 455 """Transform assignment from system commands (e.g. files = !ls)"""
431 456 m = assign_system_re.match(line)
432 457 if m is None:
433 458 return line
434 459
435 460 return assign_system_template % m.group('lhs', 'cmd')
436 461
437 462 assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
438 463 r'\s*=\s*%\s*(?P<cmd>.*)')
439 464 assign_magic_template = '%s = get_ipython().magic(%r)'
440 465 @StatelessInputTransformer.wrap
441 466 def assign_from_magic(line):
442 467 """Transform assignment from magic commands (e.g. a = %who_ls)"""
443 468 m = assign_magic_re.match(line)
444 469 if m is None:
445 470 return line
446 471
447 472 return assign_magic_template % m.group('lhs', 'cmd')
General Comments 0
You need to be logged in to leave comments. Login now