##// END OF EJS Templates
Mark inputsplitter & inputtransformer as deprecated
Thomas Kluyver -
Show More
@@ -1,766 +1,768 b''
1 """Input handling and transformation machinery.
1 """DEPRECATED: Input handling and transformation machinery.
2
3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
2 4
3 5 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 6 input from a line-oriented frontend is complete and should be executed, and when
5 7 the user should be prompted for another line of code instead. The name 'input
6 8 splitter' is largely for historical reasons.
7 9
8 10 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 11 with full support for the extended IPython syntax (magics, system calls, etc).
10 12 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 13 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 14 and stores the results.
13 15
14 16 For more details, see the class docstrings below.
15 17 """
16 18
17 19 # Copyright (c) IPython Development Team.
18 20 # Distributed under the terms of the Modified BSD License.
19 21 import ast
20 22 import codeop
21 23 import io
22 24 import re
23 25 import sys
24 26 import tokenize
25 27 import warnings
26 28
27 29 from IPython.utils.py3compat import cast_unicode
28 30 from IPython.core.inputtransformer import (leading_indent,
29 31 classic_prompt,
30 32 ipy_prompt,
31 33 cellmagic,
32 34 assemble_logical_lines,
33 35 help_end,
34 36 escaped_commands,
35 37 assign_from_magic,
36 38 assign_from_system,
37 39 assemble_python_lines,
38 40 )
39 41
40 42 # These are available in this module for backwards compatibility.
41 43 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
42 44 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
43 45 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
44 46
45 47 #-----------------------------------------------------------------------------
46 48 # Utilities
47 49 #-----------------------------------------------------------------------------
48 50
49 51 # FIXME: These are general-purpose utilities that later can be moved to the
50 52 # general ward. Kept here for now because we're being very strict about test
51 53 # coverage with this code, and this lets us ensure that we keep 100% coverage
52 54 # while developing.
53 55
54 56 # compiled regexps for autoindent management
55 57 dedent_re = re.compile('|'.join([
56 58 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
57 59 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
58 60 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
59 61 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
60 62 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
61 63 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
62 64 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
63 65 ]))
64 66 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
65 67
66 68 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
67 69 # before pure comments
68 70 comment_line_re = re.compile('^\s*\#')
69 71
70 72
71 73 def num_ini_spaces(s):
72 74 """Return the number of initial spaces in a string.
73 75
74 76 Note that tabs are counted as a single space. For now, we do *not* support
75 77 mixing of tabs and spaces in the user's input.
76 78
77 79 Parameters
78 80 ----------
79 81 s : string
80 82
81 83 Returns
82 84 -------
83 85 n : int
84 86 """
85 87
86 88 ini_spaces = ini_spaces_re.match(s)
87 89 if ini_spaces:
88 90 return ini_spaces.end()
89 91 else:
90 92 return 0
91 93
92 94 # Fake token types for partial_tokenize:
93 95 INCOMPLETE_STRING = tokenize.N_TOKENS
94 96 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
95 97
96 98 # The 2 classes below have the same API as TokenInfo, but don't try to look up
97 99 # a token type name that they won't find.
98 100 class IncompleteString:
99 101 type = exact_type = INCOMPLETE_STRING
100 102 def __init__(self, s, start, end, line):
101 103 self.s = s
102 104 self.start = start
103 105 self.end = end
104 106 self.line = line
105 107
106 108 class InMultilineStatement:
107 109 type = exact_type = IN_MULTILINE_STATEMENT
108 110 def __init__(self, pos, line):
109 111 self.s = ''
110 112 self.start = self.end = pos
111 113 self.line = line
112 114
113 115 def partial_tokens(s):
114 116 """Iterate over tokens from a possibly-incomplete string of code.
115 117
116 118 This adds two special token types: INCOMPLETE_STRING and
117 119 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
118 120 represent the two main ways for code to be incomplete.
119 121 """
120 122 readline = io.StringIO(s).readline
121 123 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
122 124 try:
123 125 for token in tokenize.generate_tokens(readline):
124 126 yield token
125 127 except tokenize.TokenError as e:
126 128 # catch EOF error
127 129 lines = s.splitlines(keepends=True)
128 130 end = len(lines), len(lines[-1])
129 131 if 'multi-line string' in e.args[0]:
130 132 l, c = start = token.end
131 133 s = lines[l-1][c:] + ''.join(lines[l:])
132 134 yield IncompleteString(s, start, end, lines[-1])
133 135 elif 'multi-line statement' in e.args[0]:
134 136 yield InMultilineStatement(end, lines[-1])
135 137 else:
136 138 raise
137 139
138 140 def find_next_indent(code):
139 141 """Find the number of spaces for the next line of indentation"""
140 142 tokens = list(partial_tokens(code))
141 143 if tokens[-1].type == tokenize.ENDMARKER:
142 144 tokens.pop()
143 145 if not tokens:
144 146 return 0
145 147 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
146 148 tokens.pop()
147 149
148 150 if tokens[-1].type == INCOMPLETE_STRING:
149 151 # Inside a multiline string
150 152 return 0
151 153
152 154 # Find the indents used before
153 155 prev_indents = [0]
154 156 def _add_indent(n):
155 157 if n != prev_indents[-1]:
156 158 prev_indents.append(n)
157 159
158 160 tokiter = iter(tokens)
159 161 for tok in tokiter:
160 162 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
161 163 _add_indent(tok.end[1])
162 164 elif (tok.type == tokenize.NL):
163 165 try:
164 166 _add_indent(next(tokiter).start[1])
165 167 except StopIteration:
166 168 break
167 169
168 170 last_indent = prev_indents.pop()
169 171
170 172 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
171 173 if tokens[-1].type == IN_MULTILINE_STATEMENT:
172 174 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
173 175 return last_indent + 4
174 176 return last_indent
175 177
176 178 if tokens[-1].exact_type == tokenize.COLON:
177 179 # Line ends with colon - indent
178 180 return last_indent + 4
179 181
180 182 if last_indent:
181 183 # Examine the last line for dedent cues - statements like return or
182 184 # raise which normally end a block of code.
183 185 last_line_starts = 0
184 186 for i, tok in enumerate(tokens):
185 187 if tok.type == tokenize.NEWLINE:
186 188 last_line_starts = i + 1
187 189
188 190 last_line_tokens = tokens[last_line_starts:]
189 191 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
190 192 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
191 193 # Find the most recent indentation less than the current level
192 194 for indent in reversed(prev_indents):
193 195 if indent < last_indent:
194 196 return indent
195 197
196 198 return last_indent
197 199
198 200
199 201 def last_blank(src):
200 202 """Determine if the input source ends in a blank.
201 203
202 204 A blank is either a newline or a line consisting of whitespace.
203 205
204 206 Parameters
205 207 ----------
206 208 src : string
207 209 A single or multiline string.
208 210 """
209 211 if not src: return False
210 212 ll = src.splitlines()[-1]
211 213 return (ll == '') or ll.isspace()
212 214
213 215
214 216 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
215 217 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
216 218
217 219 def last_two_blanks(src):
218 220 """Determine if the input source ends in two blanks.
219 221
220 222 A blank is either a newline or a line consisting of whitespace.
221 223
222 224 Parameters
223 225 ----------
224 226 src : string
225 227 A single or multiline string.
226 228 """
227 229 if not src: return False
228 230 # The logic here is tricky: I couldn't get a regexp to work and pass all
229 231 # the tests, so I took a different approach: split the source by lines,
230 232 # grab the last two and prepend '###\n' as a stand-in for whatever was in
231 233 # the body before the last two lines. Then, with that structure, it's
232 234 # possible to analyze with two regexps. Not the most elegant solution, but
233 235 # it works. If anyone tries to change this logic, make sure to validate
234 236 # the whole test suite first!
235 237 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
236 238 return (bool(last_two_blanks_re.match(new_src)) or
237 239 bool(last_two_blanks_re2.match(new_src)) )
238 240
239 241
240 242 def remove_comments(src):
241 243 """Remove all comments from input source.
242 244
243 245 Note: comments are NOT recognized inside of strings!
244 246
245 247 Parameters
246 248 ----------
247 249 src : string
248 250 A single or multiline input string.
249 251
250 252 Returns
251 253 -------
252 254 String with all Python comments removed.
253 255 """
254 256
255 257 return re.sub('#.*', '', src)
256 258
257 259
258 260 def get_input_encoding():
259 261 """Return the default standard input encoding.
260 262
261 263 If sys.stdin has no encoding, 'ascii' is returned."""
262 264 # There are strange environments for which sys.stdin.encoding is None. We
263 265 # ensure that a valid encoding is returned.
264 266 encoding = getattr(sys.stdin, 'encoding', None)
265 267 if encoding is None:
266 268 encoding = 'ascii'
267 269 return encoding
268 270
269 271 #-----------------------------------------------------------------------------
270 272 # Classes and functions for normal Python syntax handling
271 273 #-----------------------------------------------------------------------------
272 274
273 275 class InputSplitter(object):
274 276 r"""An object that can accumulate lines of Python source before execution.
275 277
276 278 This object is designed to be fed python source line-by-line, using
277 279 :meth:`push`. It will return on each push whether the currently pushed
278 280 code could be executed already. In addition, it provides a method called
279 281 :meth:`push_accepts_more` that can be used to query whether more input
280 282 can be pushed into a single interactive block.
281 283
282 284 This is a simple example of how an interactive terminal-based client can use
283 285 this tool::
284 286
285 287 isp = InputSplitter()
286 288 while isp.push_accepts_more():
287 289 indent = ' '*isp.indent_spaces
288 290 prompt = '>>> ' + indent
289 291 line = indent + raw_input(prompt)
290 292 isp.push(line)
291 293 print 'Input source was:\n', isp.source_reset(),
292 294 """
293 295 # A cache for storing the current indentation
294 296 # The first value stores the most recently processed source input
295 297 # The second value is the number of spaces for the current indentation
296 298 # If self.source matches the first value, the second value is a valid
297 299 # current indentation. Otherwise, the cache is invalid and the indentation
298 300 # must be recalculated.
299 301 _indent_spaces_cache = None, None
300 302 # String, indicating the default input encoding. It is computed by default
301 303 # at initialization time via get_input_encoding(), but it can be reset by a
302 304 # client with specific knowledge of the encoding.
303 305 encoding = ''
304 306 # String where the current full source input is stored, properly encoded.
305 307 # Reading this attribute is the normal way of querying the currently pushed
306 308 # source code, that has been properly encoded.
307 309 source = ''
308 310 # Code object corresponding to the current source. It is automatically
309 311 # synced to the source, so it can be queried at any time to obtain the code
310 312 # object; it will be None if the source doesn't compile to valid Python.
311 313 code = None
312 314
313 315 # Private attributes
314 316
315 317 # List with lines of input accumulated so far
316 318 _buffer = None
317 319 # Command compiler
318 320 _compile = None
319 321 # Boolean indicating whether the current block is complete
320 322 _is_complete = None
321 323 # Boolean indicating whether the current block has an unrecoverable syntax error
322 324 _is_invalid = False
323 325
324 326 def __init__(self):
325 327 """Create a new InputSplitter instance.
326 328 """
327 329 self._buffer = []
328 330 self._compile = codeop.CommandCompiler()
329 331 self.encoding = get_input_encoding()
330 332
331 333 def reset(self):
332 334 """Reset the input buffer and associated state."""
333 335 self._buffer[:] = []
334 336 self.source = ''
335 337 self.code = None
336 338 self._is_complete = False
337 339 self._is_invalid = False
338 340
339 341 def source_reset(self):
340 342 """Return the input source and perform a full reset.
341 343 """
342 344 out = self.source
343 345 self.reset()
344 346 return out
345 347
346 348 def check_complete(self, source):
347 349 """Return whether a block of code is ready to execute, or should be continued
348 350
349 351 This is a non-stateful API, and will reset the state of this InputSplitter.
350 352
351 353 Parameters
352 354 ----------
353 355 source : string
354 356 Python input code, which can be multiline.
355 357
356 358 Returns
357 359 -------
358 360 status : str
359 361 One of 'complete', 'incomplete', or 'invalid' if source is not a
360 362 prefix of valid code.
361 363 indent_spaces : int or None
362 364 The number of spaces by which to indent the next line of code. If
363 365 status is not 'incomplete', this is None.
364 366 """
365 367 self.reset()
366 368 try:
367 369 self.push(source)
368 370 except SyntaxError:
369 371 # Transformers in IPythonInputSplitter can raise SyntaxError,
370 372 # which push() will not catch.
371 373 return 'invalid', None
372 374 else:
373 375 if self._is_invalid:
374 376 return 'invalid', None
375 377 elif self.push_accepts_more():
376 378 return 'incomplete', self.get_indent_spaces()
377 379 else:
378 380 return 'complete', None
379 381 finally:
380 382 self.reset()
381 383
382 384 def push(self, lines):
383 385 """Push one or more lines of input.
384 386
385 387 This stores the given lines and returns a status code indicating
386 388 whether the code forms a complete Python block or not.
387 389
388 390 Any exceptions generated in compilation are swallowed, but if an
389 391 exception was produced, the method returns True.
390 392
391 393 Parameters
392 394 ----------
393 395 lines : string
394 396 One or more lines of Python input.
395 397
396 398 Returns
397 399 -------
398 400 is_complete : boolean
399 401 True if the current input source (the result of the current input
400 402 plus prior inputs) forms a complete Python execution block. Note that
401 403 this value is also stored as a private attribute (``_is_complete``), so it
402 404 can be queried at any time.
403 405 """
404 406 self._store(lines)
405 407 source = self.source
406 408
407 409 # Before calling _compile(), reset the code object to None so that if an
408 410 # exception is raised in compilation, we don't mislead by having
409 411 # inconsistent code/source attributes.
410 412 self.code, self._is_complete = None, None
411 413 self._is_invalid = False
412 414
413 415 # Honor termination lines properly
414 416 if source.endswith('\\\n'):
415 417 return False
416 418
417 419 try:
418 420 with warnings.catch_warnings():
419 421 warnings.simplefilter('error', SyntaxWarning)
420 422 self.code = self._compile(source, symbol="exec")
421 423 # Invalid syntax can produce any of a number of different errors from
422 424 # inside the compiler, so we have to catch them all. Syntax errors
423 425 # immediately produce a 'ready' block, so the invalid Python can be
424 426 # sent to the kernel for evaluation with possible ipython
425 427 # special-syntax conversion.
426 428 except (SyntaxError, OverflowError, ValueError, TypeError,
427 429 MemoryError, SyntaxWarning):
428 430 self._is_complete = True
429 431 self._is_invalid = True
430 432 else:
431 433 # Compilation didn't produce any exceptions (though it may not have
432 434 # given a complete code object)
433 435 self._is_complete = self.code is not None
434 436
435 437 return self._is_complete
436 438
437 439 def push_accepts_more(self):
438 440 """Return whether a block of interactive input can accept more input.
439 441
440 442 This method is meant to be used by line-oriented frontends, who need to
441 443 guess whether a block is complete or not based solely on prior and
442 444 current input lines. The InputSplitter considers it has a complete
443 445 interactive block and will not accept more input when either:
444 446
445 447 * A SyntaxError is raised
446 448
447 449 * The code is complete and consists of a single line or a single
448 450 non-compound statement
449 451
450 452 * The code is complete and has a blank line at the end
451 453
452 454 If the current input produces a syntax error, this method immediately
453 455 returns False but does *not* raise the syntax error exception, as
454 456 typically clients will want to send invalid syntax to an execution
455 457 backend which might convert the invalid syntax into valid Python via
456 458 one of the dynamic IPython mechanisms.
457 459 """
458 460
459 461 # With incomplete input, unconditionally accept more
460 462 # A syntax error also sets _is_complete to True - see push()
461 463 if not self._is_complete:
462 464 #print("Not complete") # debug
463 465 return True
464 466
465 467 # The user can make any (complete) input execute by leaving a blank line
466 468 last_line = self.source.splitlines()[-1]
467 469 if (not last_line) or last_line.isspace():
468 470 #print("Blank line") # debug
469 471 return False
470 472
471 473 # If there's just a single line or AST node, and we're flush left, as is
472 474 # the case after a simple statement such as 'a=1', we want to execute it
473 475 # straight away.
474 476 if self.get_indent_spaces() == 0:
475 477 if len(self.source.splitlines()) <= 1:
476 478 return False
477 479
478 480 try:
479 481 code_ast = ast.parse(u''.join(self._buffer))
480 482 except Exception:
481 483 #print("Can't parse AST") # debug
482 484 return False
483 485 else:
484 486 if len(code_ast.body) == 1 and \
485 487 not hasattr(code_ast.body[0], 'body'):
486 488 #print("Simple statement") # debug
487 489 return False
488 490
489 491 # General fallback - accept more code
490 492 return True
491 493
492 494 def get_indent_spaces(self):
493 495 sourcefor, n = self._indent_spaces_cache
494 496 if sourcefor == self.source:
495 497 return n
496 498
497 499 # self.source always has a trailing newline
498 500 n = find_next_indent(self.source[:-1])
499 501 self._indent_spaces_cache = (self.source, n)
500 502 return n
501 503
502 504 # Backwards compatibility. I think all code that used .indent_spaces was
503 505 # inside IPython, but we can leave this here until IPython 7 in case any
504 506 # other modules are using it. -TK, November 2017
505 507 indent_spaces = property(get_indent_spaces)
506 508
507 509 def _store(self, lines, buffer=None, store='source'):
508 510 """Store one or more lines of input.
509 511
510 512 If input lines are not newline-terminated, a newline is automatically
511 513 appended."""
512 514
513 515 if buffer is None:
514 516 buffer = self._buffer
515 517
516 518 if lines.endswith('\n'):
517 519 buffer.append(lines)
518 520 else:
519 521 buffer.append(lines+'\n')
520 522 setattr(self, store, self._set_source(buffer))
521 523
522 524 def _set_source(self, buffer):
523 525 return u''.join(buffer)
524 526
525 527
526 528 class IPythonInputSplitter(InputSplitter):
527 529 """An input splitter that recognizes all of IPython's special syntax."""
528 530
529 531 # String with raw, untransformed input.
530 532 source_raw = ''
531 533
532 534 # Flag to track when a transformer has stored input that it hasn't given
533 535 # back yet.
534 536 transformer_accumulating = False
535 537
536 538 # Flag to track when assemble_python_lines has stored input that it hasn't
537 539 # given back yet.
538 540 within_python_line = False
539 541
540 542 # Private attributes
541 543
542 544 # List with lines of raw input accumulated so far.
543 545 _buffer_raw = None
544 546
545 547 def __init__(self, line_input_checker=True, physical_line_transforms=None,
546 548 logical_line_transforms=None, python_line_transforms=None):
547 549 super(IPythonInputSplitter, self).__init__()
548 550 self._buffer_raw = []
549 551 self._validate = True
550 552
551 553 if physical_line_transforms is not None:
552 554 self.physical_line_transforms = physical_line_transforms
553 555 else:
554 556 self.physical_line_transforms = [
555 557 leading_indent(),
556 558 classic_prompt(),
557 559 ipy_prompt(),
558 560 cellmagic(end_on_blank_line=line_input_checker),
559 561 ]
560 562
561 563 self.assemble_logical_lines = assemble_logical_lines()
562 564 if logical_line_transforms is not None:
563 565 self.logical_line_transforms = logical_line_transforms
564 566 else:
565 567 self.logical_line_transforms = [
566 568 help_end(),
567 569 escaped_commands(),
568 570 assign_from_magic(),
569 571 assign_from_system(),
570 572 ]
571 573
572 574 self.assemble_python_lines = assemble_python_lines()
573 575 if python_line_transforms is not None:
574 576 self.python_line_transforms = python_line_transforms
575 577 else:
576 578 # We don't use any of these at present
577 579 self.python_line_transforms = []
578 580
579 581 @property
580 582 def transforms(self):
581 583 "Quick access to all transformers."
582 584 return self.physical_line_transforms + \
583 585 [self.assemble_logical_lines] + self.logical_line_transforms + \
584 586 [self.assemble_python_lines] + self.python_line_transforms
585 587
586 588 @property
587 589 def transforms_in_use(self):
588 590 """Transformers, excluding logical line transformers if we're in a
589 591 Python line."""
590 592 t = self.physical_line_transforms[:]
591 593 if not self.within_python_line:
592 594 t += [self.assemble_logical_lines] + self.logical_line_transforms
593 595 return t + [self.assemble_python_lines] + self.python_line_transforms
594 596
595 597 def reset(self):
596 598 """Reset the input buffer and associated state."""
597 599 super(IPythonInputSplitter, self).reset()
598 600 self._buffer_raw[:] = []
599 601 self.source_raw = ''
600 602 self.transformer_accumulating = False
601 603 self.within_python_line = False
602 604
603 605 for t in self.transforms:
604 606 try:
605 607 t.reset()
606 608 except SyntaxError:
607 609 # Nothing that calls reset() expects to handle transformer
608 610 # errors
609 611 pass
610 612
611 613 def flush_transformers(self):
612 614 def _flush(transform, outs):
613 615 """yield transformed lines
614 616
615 617 always strings, never None
616 618
617 619 transform: the current transform
618 620 outs: an iterable of previously transformed inputs.
619 621 Each may be multiline, which will be passed
620 622 one line at a time to transform.
621 623 """
622 624 for out in outs:
623 625 for line in out.splitlines():
624 626 # push one line at a time
625 627 tmp = transform.push(line)
626 628 if tmp is not None:
627 629 yield tmp
628 630
629 631 # reset the transform
630 632 tmp = transform.reset()
631 633 if tmp is not None:
632 634 yield tmp
633 635
634 636 out = []
635 637 for t in self.transforms_in_use:
636 638 out = _flush(t, out)
637 639
638 640 out = list(out)
639 641 if out:
640 642 self._store('\n'.join(out))
641 643
642 644 def raw_reset(self):
643 645 """Return raw input only and perform a full reset.
644 646 """
645 647 out = self.source_raw
646 648 self.reset()
647 649 return out
648 650
649 651 def source_reset(self):
650 652 try:
651 653 self.flush_transformers()
652 654 return self.source
653 655 finally:
654 656 self.reset()
655 657
656 658 def push_accepts_more(self):
657 659 if self.transformer_accumulating:
658 660 return True
659 661 else:
660 662 return super(IPythonInputSplitter, self).push_accepts_more()
661 663
662 664 def transform_cell(self, cell):
663 665 """Process and translate a cell of input.
664 666 """
665 667 self.reset()
666 668 try:
667 669 self.push(cell)
668 670 self.flush_transformers()
669 671 return self.source
670 672 finally:
671 673 self.reset()
672 674
673 675 def push(self, lines):
674 676 """Push one or more lines of IPython input.
675 677
676 678 This stores the given lines and returns a status code indicating
677 679 whether the code forms a complete Python block or not, after processing
678 680 all input lines for special IPython syntax.
679 681
680 682 Any exceptions generated in compilation are swallowed, but if an
681 683 exception was produced, the method returns True.
682 684
683 685 Parameters
684 686 ----------
685 687 lines : string
686 688 One or more lines of Python input.
687 689
688 690 Returns
689 691 -------
690 692 is_complete : boolean
691 693 True if the current input source (the result of the current input
692 694 plus prior inputs) forms a complete Python execution block. Note that
693 695 this value is also stored as a private attribute (_is_complete), so it
694 696 can be queried at any time.
695 697 """
696 698
697 699 # We must ensure all input is pure unicode
698 700 lines = cast_unicode(lines, self.encoding)
699 701 # ''.splitlines() --> [], but we need to push the empty line to transformers
700 702 lines_list = lines.splitlines()
701 703 if not lines_list:
702 704 lines_list = ['']
703 705
704 706 # Store raw source before applying any transformations to it. Note
705 707 # that this must be done *after* the reset() call that would otherwise
706 708 # flush the buffer.
707 709 self._store(lines, self._buffer_raw, 'source_raw')
708 710
709 711 transformed_lines_list = []
710 712 for line in lines_list:
711 713 transformed = self._transform_line(line)
712 714 if transformed is not None:
713 715 transformed_lines_list.append(transformed)
714 716
715 717 if transformed_lines_list:
716 718 transformed_lines = '\n'.join(transformed_lines_list)
717 719 return super(IPythonInputSplitter, self).push(transformed_lines)
718 720 else:
719 721 # Got nothing back from transformers - they must be waiting for
720 722 # more input.
721 723 return False
722 724
723 725 def _transform_line(self, line):
724 726 """Push a line of input code through the various transformers.
725 727
726 728 Returns any output from the transformers, or None if a transformer
727 729 is accumulating lines.
728 730
729 731 Sets self.transformer_accumulating as a side effect.
730 732 """
731 733 def _accumulating(dbg):
732 734 #print(dbg)
733 735 self.transformer_accumulating = True
734 736 return None
735 737
736 738 for transformer in self.physical_line_transforms:
737 739 line = transformer.push(line)
738 740 if line is None:
739 741 return _accumulating(transformer)
740 742
741 743 if not self.within_python_line:
742 744 line = self.assemble_logical_lines.push(line)
743 745 if line is None:
744 746 return _accumulating('acc logical line')
745 747
746 748 for transformer in self.logical_line_transforms:
747 749 line = transformer.push(line)
748 750 if line is None:
749 751 return _accumulating(transformer)
750 752
751 753 line = self.assemble_python_lines.push(line)
752 754 if line is None:
753 755 self.within_python_line = True
754 756 return _accumulating('acc python line')
755 757 else:
756 758 self.within_python_line = False
757 759
758 760 for transformer in self.python_line_transforms:
759 761 line = transformer.push(line)
760 762 if line is None:
761 763 return _accumulating(transformer)
762 764
763 765 #print("transformers clear") #debug
764 766 self.transformer_accumulating = False
765 767 return line
766 768
@@ -1,534 +1,536 b''
1 """Input transformer classes to support IPython special syntax.
1 """DEPRECATED: Input transformer classes to support IPython special syntax.
2
3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
2 4
3 5 This includes the machinery to recognise and transform ``%magic`` commands,
4 6 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 7 """
6 8 import abc
7 9 import functools
8 10 import re
9 11 from io import StringIO
10 12
11 13 from IPython.core.splitinput import LineInfo
12 14 from IPython.utils import tokenize2
13 15 from IPython.utils.tokenize2 import generate_tokens, untokenize, TokenError
14 16
15 17 #-----------------------------------------------------------------------------
16 18 # Globals
17 19 #-----------------------------------------------------------------------------
18 20
19 21 # The escape sequences that define the syntax transformations IPython will
20 22 # apply to user input. These can NOT be just changed here: many regular
21 23 # expressions and other parts of the code may use their hardcoded values, and
22 24 # for all intents and purposes they constitute the 'IPython syntax', so they
23 25 # should be considered fixed.
24 26
25 27 ESC_SHELL = '!' # Send line to underlying system shell
26 28 ESC_SH_CAP = '!!' # Send line to system shell and capture output
27 29 ESC_HELP = '?' # Find information about object
28 30 ESC_HELP2 = '??' # Find extra-detailed information about object
29 31 ESC_MAGIC = '%' # Call magic function
30 32 ESC_MAGIC2 = '%%' # Call cell-magic function
31 33 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
32 34 ESC_QUOTE2 = ';' # Quote all args as a single string, call
33 35 ESC_PAREN = '/' # Call first argument with rest of line as arguments
34 36
35 37 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
36 38 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
37 39 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
38 40
39 41
40 42 class InputTransformer(metaclass=abc.ABCMeta):
41 43 """Abstract base class for line-based input transformers."""
42 44
43 45 @abc.abstractmethod
44 46 def push(self, line):
45 47 """Send a line of input to the transformer, returning the transformed
46 48 input or None if the transformer is waiting for more input.
47 49
48 50 Must be overridden by subclasses.
49 51
50 52 Implementations may raise ``SyntaxError`` if the input is invalid. No
51 53 other exceptions may be raised.
52 54 """
53 55 pass
54 56
55 57 @abc.abstractmethod
56 58 def reset(self):
57 59 """Return, transformed any lines that the transformer has accumulated,
58 60 and reset its internal state.
59 61
60 62 Must be overridden by subclasses.
61 63 """
62 64 pass
63 65
64 66 @classmethod
65 67 def wrap(cls, func):
66 68 """Can be used by subclasses as a decorator, to return a factory that
67 69 will allow instantiation with the decorated object.
68 70 """
69 71 @functools.wraps(func)
70 72 def transformer_factory(**kwargs):
71 73 return cls(func, **kwargs)
72 74
73 75 return transformer_factory
74 76
75 77 class StatelessInputTransformer(InputTransformer):
76 78 """Wrapper for a stateless input transformer implemented as a function."""
77 79 def __init__(self, func):
78 80 self.func = func
79 81
80 82 def __repr__(self):
81 83 return "StatelessInputTransformer(func={0!r})".format(self.func)
82 84
83 85 def push(self, line):
84 86 """Send a line of input to the transformer, returning the
85 87 transformed input."""
86 88 return self.func(line)
87 89
88 90 def reset(self):
89 91 """No-op - exists for compatibility."""
90 92 pass
91 93
92 94 class CoroutineInputTransformer(InputTransformer):
93 95 """Wrapper for an input transformer implemented as a coroutine."""
94 96 def __init__(self, coro, **kwargs):
95 97 # Prime it
96 98 self.coro = coro(**kwargs)
97 99 next(self.coro)
98 100
99 101 def __repr__(self):
100 102 return "CoroutineInputTransformer(coro={0!r})".format(self.coro)
101 103
102 104 def push(self, line):
103 105 """Send a line of input to the transformer, returning the
104 106 transformed input or None if the transformer is waiting for more
105 107 input.
106 108 """
107 109 return self.coro.send(line)
108 110
109 111 def reset(self):
110 112 """Return, transformed any lines that the transformer has
111 113 accumulated, and reset its internal state.
112 114 """
113 115 return self.coro.send(None)
114 116
115 117 class TokenInputTransformer(InputTransformer):
116 118 """Wrapper for a token-based input transformer.
117 119
118 120 func should accept a list of tokens (5-tuples, see tokenize docs), and
119 121 return an iterable which can be passed to tokenize.untokenize().
120 122 """
121 123 def __init__(self, func):
122 124 self.func = func
123 125 self.buf = []
124 126 self.reset_tokenizer()
125 127
126 128 def reset_tokenizer(self):
127 129 it = iter(self.buf)
128 130 self.tokenizer = generate_tokens(it.__next__)
129 131
130 132 def push(self, line):
131 133 self.buf.append(line + '\n')
132 134 if all(l.isspace() for l in self.buf):
133 135 return self.reset()
134 136
135 137 tokens = []
136 138 stop_at_NL = False
137 139 try:
138 140 for intok in self.tokenizer:
139 141 tokens.append(intok)
140 142 t = intok[0]
141 143 if t == tokenize2.NEWLINE or (stop_at_NL and t == tokenize2.NL):
142 144 # Stop before we try to pull a line we don't have yet
143 145 break
144 146 elif t == tokenize2.ERRORTOKEN:
145 147 stop_at_NL = True
146 148 except TokenError:
147 149 # Multi-line statement - stop and try again with the next line
148 150 self.reset_tokenizer()
149 151 return None
150 152
151 153 return self.output(tokens)
152 154
153 155 def output(self, tokens):
154 156 self.buf.clear()
155 157 self.reset_tokenizer()
156 158 return untokenize(self.func(tokens)).rstrip('\n')
157 159
158 160 def reset(self):
159 161 l = ''.join(self.buf)
160 162 self.buf.clear()
161 163 self.reset_tokenizer()
162 164 if l:
163 165 return l.rstrip('\n')
164 166
165 167 class assemble_python_lines(TokenInputTransformer):
166 168 def __init__(self):
167 169 super(assemble_python_lines, self).__init__(None)
168 170
169 171 def output(self, tokens):
170 172 return self.reset()
171 173
172 174 @CoroutineInputTransformer.wrap
173 175 def assemble_logical_lines():
174 176 """Join lines following explicit line continuations (\)"""
175 177 line = ''
176 178 while True:
177 179 line = (yield line)
178 180 if not line or line.isspace():
179 181 continue
180 182
181 183 parts = []
182 184 while line is not None:
183 185 if line.endswith('\\') and (not has_comment(line)):
184 186 parts.append(line[:-1])
185 187 line = (yield None) # Get another line
186 188 else:
187 189 parts.append(line)
188 190 break
189 191
190 192 # Output
191 193 line = ''.join(parts)
192 194
193 195 # Utilities
194 196 def _make_help_call(target, esc, lspace, next_input=None):
195 197 """Prepares a pinfo(2)/psearch call from a target name and the escape
196 198 (i.e. ? or ??)"""
197 199 method = 'pinfo2' if esc == '??' \
198 200 else 'psearch' if '*' in target \
199 201 else 'pinfo'
200 202 arg = " ".join([method, target])
201 203 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
202 204 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
203 205 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
204 206 if next_input is None:
205 207 return '%sget_ipython().run_line_magic(%r, %r)' % (lspace, t_magic_name, t_magic_arg_s)
206 208 else:
207 209 return '%sget_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
208 210 (lspace, next_input, t_magic_name, t_magic_arg_s)
209 211
210 212 # These define the transformations for the different escape characters.
211 213 def _tr_system(line_info):
212 214 "Translate lines escaped with: !"
213 215 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
214 216 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
215 217
216 218 def _tr_system2(line_info):
217 219 "Translate lines escaped with: !!"
218 220 cmd = line_info.line.lstrip()[2:]
219 221 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
220 222
221 223 def _tr_help(line_info):
222 224 "Translate lines escaped with: ?/??"
223 225 # A naked help line should just fire the intro help screen
224 226 if not line_info.line[1:]:
225 227 return 'get_ipython().show_usage()'
226 228
227 229 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
228 230
229 231 def _tr_magic(line_info):
230 232 "Translate lines escaped with: %"
231 233 tpl = '%sget_ipython().run_line_magic(%r, %r)'
232 234 if line_info.line.startswith(ESC_MAGIC2):
233 235 return line_info.line
234 236 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
235 237 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
236 238 t_magic_name, _, t_magic_arg_s = cmd.partition(' ')
237 239 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
238 240 return tpl % (line_info.pre, t_magic_name, t_magic_arg_s)
239 241
240 242 def _tr_quote(line_info):
241 243 "Translate lines escaped with: ,"
242 244 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
243 245 '", "'.join(line_info.the_rest.split()) )
244 246
245 247 def _tr_quote2(line_info):
246 248 "Translate lines escaped with: ;"
247 249 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
248 250 line_info.the_rest)
249 251
250 252 def _tr_paren(line_info):
251 253 "Translate lines escaped with: /"
252 254 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
253 255 ", ".join(line_info.the_rest.split()))
254 256
255 257 tr = { ESC_SHELL : _tr_system,
256 258 ESC_SH_CAP : _tr_system2,
257 259 ESC_HELP : _tr_help,
258 260 ESC_HELP2 : _tr_help,
259 261 ESC_MAGIC : _tr_magic,
260 262 ESC_QUOTE : _tr_quote,
261 263 ESC_QUOTE2 : _tr_quote2,
262 264 ESC_PAREN : _tr_paren }
263 265
264 266 @StatelessInputTransformer.wrap
265 267 def escaped_commands(line):
266 268 """Transform escaped commands - %magic, !system, ?help + various autocalls.
267 269 """
268 270 if not line or line.isspace():
269 271 return line
270 272 lineinf = LineInfo(line)
271 273 if lineinf.esc not in tr:
272 274 return line
273 275
274 276 return tr[lineinf.esc](lineinf)
275 277
276 278 _initial_space_re = re.compile(r'\s*')
277 279
278 280 _help_end_re = re.compile(r"""(%{0,2}
279 281 [a-zA-Z_*][\w*]* # Variable name
280 282 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
281 283 )
282 284 (\?\??)$ # ? or ??
283 285 """,
284 286 re.VERBOSE)
285 287
286 288 # Extra pseudotokens for multiline strings and data structures
287 289 _MULTILINE_STRING = object()
288 290 _MULTILINE_STRUCTURE = object()
289 291
290 292 def _line_tokens(line):
291 293 """Helper for has_comment and ends_in_comment_or_string."""
292 294 readline = StringIO(line).readline
293 295 toktypes = set()
294 296 try:
295 297 for t in generate_tokens(readline):
296 298 toktypes.add(t[0])
297 299 except TokenError as e:
298 300 # There are only two cases where a TokenError is raised.
299 301 if 'multi-line string' in e.args[0]:
300 302 toktypes.add(_MULTILINE_STRING)
301 303 else:
302 304 toktypes.add(_MULTILINE_STRUCTURE)
303 305 return toktypes
304 306
305 307 def has_comment(src):
306 308 """Indicate whether an input line has (i.e. ends in, or is) a comment.
307 309
308 310 This uses tokenize, so it can distinguish comments from # inside strings.
309 311
310 312 Parameters
311 313 ----------
312 314 src : string
313 315 A single line input string.
314 316
315 317 Returns
316 318 -------
317 319 comment : bool
318 320 True if source has a comment.
319 321 """
320 322 return (tokenize2.COMMENT in _line_tokens(src))
321 323
322 324 def ends_in_comment_or_string(src):
323 325 """Indicates whether or not an input line ends in a comment or within
324 326 a multiline string.
325 327
326 328 Parameters
327 329 ----------
328 330 src : string
329 331 A single line input string.
330 332
331 333 Returns
332 334 -------
333 335 comment : bool
334 336 True if source ends in a comment or multiline string.
335 337 """
336 338 toktypes = _line_tokens(src)
337 339 return (tokenize2.COMMENT in toktypes) or (_MULTILINE_STRING in toktypes)
338 340
339 341
340 342 @StatelessInputTransformer.wrap
341 343 def help_end(line):
342 344 """Translate lines with ?/?? at the end"""
343 345 m = _help_end_re.search(line)
344 346 if m is None or ends_in_comment_or_string(line):
345 347 return line
346 348 target = m.group(1)
347 349 esc = m.group(3)
348 350 lspace = _initial_space_re.match(line).group(0)
349 351
350 352 # If we're mid-command, put it back on the next prompt for the user.
351 353 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
352 354
353 355 return _make_help_call(target, esc, lspace, next_input)
354 356
355 357
356 358 @CoroutineInputTransformer.wrap
357 359 def cellmagic(end_on_blank_line=False):
358 360 """Captures & transforms cell magics.
359 361
360 362 After a cell magic is started, this stores up any lines it gets until it is
361 363 reset (sent None).
362 364 """
363 365 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
364 366 cellmagic_help_re = re.compile('%%\w+\?')
365 367 line = ''
366 368 while True:
367 369 line = (yield line)
368 370 # consume leading empty lines
369 371 while not line:
370 372 line = (yield line)
371 373
372 374 if not line.startswith(ESC_MAGIC2):
373 375 # This isn't a cell magic, idle waiting for reset then start over
374 376 while line is not None:
375 377 line = (yield line)
376 378 continue
377 379
378 380 if cellmagic_help_re.match(line):
379 381 # This case will be handled by help_end
380 382 continue
381 383
382 384 first = line
383 385 body = []
384 386 line = (yield None)
385 387 while (line is not None) and \
386 388 ((line.strip() != '') or not end_on_blank_line):
387 389 body.append(line)
388 390 line = (yield None)
389 391
390 392 # Output
391 393 magic_name, _, first = first.partition(' ')
392 394 magic_name = magic_name.lstrip(ESC_MAGIC2)
393 395 line = tpl % (magic_name, first, u'\n'.join(body))
394 396
395 397
396 398 def _strip_prompts(prompt_re, initial_re=None, turnoff_re=None):
397 399 """Remove matching input prompts from a block of input.
398 400
399 401 Parameters
400 402 ----------
401 403 prompt_re : regular expression
402 404 A regular expression matching any input prompt (including continuation)
403 405 initial_re : regular expression, optional
404 406 A regular expression matching only the initial prompt, but not continuation.
405 407 If no initial expression is given, prompt_re will be used everywhere.
406 408 Used mainly for plain Python prompts, where the continuation prompt
407 409 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
408 410
409 411 If initial_re and prompt_re differ,
410 412 only initial_re will be tested against the first line.
411 413 If any prompt is found on the first two lines,
412 414 prompts will be stripped from the rest of the block.
413 415 """
414 416 if initial_re is None:
415 417 initial_re = prompt_re
416 418 line = ''
417 419 while True:
418 420 line = (yield line)
419 421
420 422 # First line of cell
421 423 if line is None:
422 424 continue
423 425 out, n1 = initial_re.subn('', line, count=1)
424 426 if turnoff_re and not n1:
425 427 if turnoff_re.match(line):
426 428 # We're in e.g. a cell magic; disable this transformer for
427 429 # the rest of the cell.
428 430 while line is not None:
429 431 line = (yield line)
430 432 continue
431 433
432 434 line = (yield out)
433 435
434 436 if line is None:
435 437 continue
436 438 # check for any prompt on the second line of the cell,
437 439 # because people often copy from just after the first prompt,
438 440 # so we might not see it in the first line.
439 441 out, n2 = prompt_re.subn('', line, count=1)
440 442 line = (yield out)
441 443
442 444 if n1 or n2:
443 445 # Found a prompt in the first two lines - check for it in
444 446 # the rest of the cell as well.
445 447 while line is not None:
446 448 line = (yield prompt_re.sub('', line, count=1))
447 449
448 450 else:
449 451 # Prompts not in input - wait for reset
450 452 while line is not None:
451 453 line = (yield line)
452 454
453 455 @CoroutineInputTransformer.wrap
454 456 def classic_prompt():
455 457 """Strip the >>>/... prompts of the Python interactive shell."""
456 458 # FIXME: non-capturing version (?:...) usable?
457 459 prompt_re = re.compile(r'^(>>>|\.\.\.)( |$)')
458 460 initial_re = re.compile(r'^>>>( |$)')
459 461 # Any %magic/!system is IPython syntax, so we needn't look for >>> prompts
460 462 turnoff_re = re.compile(r'^[%!]')
461 463 return _strip_prompts(prompt_re, initial_re, turnoff_re)
462 464
463 465 @CoroutineInputTransformer.wrap
464 466 def ipy_prompt():
465 467 """Strip IPython's In [1]:/...: prompts."""
466 468 # FIXME: non-capturing version (?:...) usable?
467 469 prompt_re = re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)')
468 470 # Disable prompt stripping inside cell magics
469 471 turnoff_re = re.compile(r'^%%')
470 472 return _strip_prompts(prompt_re, turnoff_re=turnoff_re)
471 473
472 474
473 475 @CoroutineInputTransformer.wrap
474 476 def leading_indent():
475 477 """Remove leading indentation.
476 478
477 479 If the first line starts with a spaces or tabs, the same whitespace will be
478 480 removed from each following line until it is reset.
479 481 """
480 482 space_re = re.compile(r'^[ \t]+')
481 483 line = ''
482 484 while True:
483 485 line = (yield line)
484 486
485 487 if line is None:
486 488 continue
487 489
488 490 m = space_re.match(line)
489 491 if m:
490 492 space = m.group(0)
491 493 while line is not None:
492 494 if line.startswith(space):
493 495 line = line[len(space):]
494 496 line = (yield line)
495 497 else:
496 498 # No leading spaces - wait for reset
497 499 while line is not None:
498 500 line = (yield line)
499 501
500 502
501 503 _assign_pat = \
502 504 r'''(?P<lhs>(\s*)
503 505 ([\w\.]+) # Initial identifier
504 506 (\s*,\s*
505 507 \*?[\w\.]+)* # Further identifiers for unpacking
506 508 \s*?,? # Trailing comma
507 509 )
508 510 \s*=\s*
509 511 '''
510 512
511 513 assign_system_re = re.compile(r'{}!\s*(?P<cmd>.*)'.format(_assign_pat), re.VERBOSE)
512 514 assign_system_template = '%s = get_ipython().getoutput(%r)'
513 515 @StatelessInputTransformer.wrap
514 516 def assign_from_system(line):
515 517 """Transform assignment from system commands (e.g. files = !ls)"""
516 518 m = assign_system_re.match(line)
517 519 if m is None:
518 520 return line
519 521
520 522 return assign_system_template % m.group('lhs', 'cmd')
521 523
522 524 assign_magic_re = re.compile(r'{}%\s*(?P<cmd>.*)'.format(_assign_pat), re.VERBOSE)
523 525 assign_magic_template = '%s = get_ipython().run_line_magic(%r, %r)'
524 526 @StatelessInputTransformer.wrap
525 527 def assign_from_magic(line):
526 528 """Transform assignment from magic commands (e.g. a = %who_ls)"""
527 529 m = assign_magic_re.match(line)
528 530 if m is None:
529 531 return line
530 532 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
531 533 m_lhs, m_cmd = m.group('lhs', 'cmd')
532 534 t_magic_name, _, t_magic_arg_s = m_cmd.partition(' ')
533 535 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
534 536 return assign_magic_template % (m_lhs, t_magic_name, t_magic_arg_s)
@@ -1,525 +1,534 b''
1 """Input transformer machinery to support IPython special syntax.
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 """
6
7 # Copyright (c) IPython Development Team.
8 # Distributed under the terms of the Modified BSD License.
9
1 10 from codeop import compile_command
2 11 import re
3 12 from typing import List, Tuple
4 13 from IPython.utils import tokenize2
5 14 from IPython.utils.tokenutil import generate_tokens
6 15
7 16 _indent_re = re.compile(r'^[ \t]+')
8 17
9 18 def leading_indent(lines):
10 19 """Remove leading indentation.
11 20
12 21 If the first line starts with a spaces or tabs, the same whitespace will be
13 22 removed from each following line.
14 23 """
15 24 m = _indent_re.match(lines[0])
16 25 if not m:
17 26 return lines
18 27 space = m.group(0)
19 28 n = len(space)
20 29 return [l[n:] if l.startswith(space) else l
21 30 for l in lines]
22 31
23 32 class PromptStripper:
24 33 """Remove matching input prompts from a block of input.
25 34
26 35 Parameters
27 36 ----------
28 37 prompt_re : regular expression
29 38 A regular expression matching any input prompt (including continuation)
30 39 initial_re : regular expression, optional
31 40 A regular expression matching only the initial prompt, but not continuation.
32 41 If no initial expression is given, prompt_re will be used everywhere.
33 42 Used mainly for plain Python prompts, where the continuation prompt
34 43 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
35 44
36 45 If initial_re and prompt_re differ,
37 46 only initial_re will be tested against the first line.
38 47 If any prompt is found on the first two lines,
39 48 prompts will be stripped from the rest of the block.
40 49 """
41 50 def __init__(self, prompt_re, initial_re=None):
42 51 self.prompt_re = prompt_re
43 52 self.initial_re = initial_re or prompt_re
44 53
45 54 def _strip(self, lines):
46 55 return [self.prompt_re.sub('', l, count=1) for l in lines]
47 56
48 57 def __call__(self, lines):
49 58 if self.initial_re.match(lines[0]) or \
50 59 (len(lines) > 1 and self.prompt_re.match(lines[1])):
51 60 return self._strip(lines)
52 61 return lines
53 62
54 63 classic_prompt = PromptStripper(
55 64 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
56 65 initial_re=re.compile(r'^>>>( |$)')
57 66 )
58 67
59 68 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
60 69
61 70 def cell_magic(lines):
62 71 if not lines[0].startswith('%%'):
63 72 return lines
64 73 if re.match('%%\w+\?', lines[0]):
65 74 # This case will be handled by help_end
66 75 return lines
67 76 magic_name, _, first_line = lines[0][2:-1].partition(' ')
68 77 body = ''.join(lines[1:])
69 78 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
70 79 % (magic_name, first_line, body)]
71 80
72 81 # -----
73 82
74 83 def _find_assign_op(token_line):
75 84 # Find the first assignment in the line ('=' not inside brackets)
76 85 # We don't try to support multiple special assignment (a = b = %foo)
77 86 paren_level = 0
78 87 for i, ti in enumerate(token_line):
79 88 s = ti.string
80 89 if s == '=' and paren_level == 0:
81 90 return i
82 91 if s in '([{':
83 92 paren_level += 1
84 93 elif s in ')]}':
85 94 paren_level -= 1
86 95
87 96 def find_end_of_continued_line(lines, start_line: int):
88 97 """Find the last line of a line explicitly extended using backslashes.
89 98
90 99 Uses 0-indexed line numbers.
91 100 """
92 101 end_line = start_line
93 102 while lines[end_line].endswith('\\\n'):
94 103 end_line += 1
95 104 if end_line >= len(lines):
96 105 break
97 106 return end_line
98 107
99 108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
100 109 """Assemble pieces of a continued line into a single line.
101 110
102 111 Uses 0-indexed line numbers. *start* is (lineno, colno).
103 112 """
104 113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
105 114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
106 115 + [parts[-1][:-1]]) # Strip newline from last line
107 116
108 117 class TokenTransformBase:
109 118 # Lower numbers -> higher priority (for matches in the same location)
110 119 priority = 10
111 120
112 121 def sortby(self):
113 122 return self.start_line, self.start_col, self.priority
114 123
115 124 def __init__(self, start):
116 125 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
117 126 self.start_col = start[1]
118 127
119 128 def transform(self, lines: List[str]):
120 129 raise NotImplementedError
121 130
122 131 class MagicAssign(TokenTransformBase):
123 132 @classmethod
124 133 def find(cls, tokens_by_line):
125 134 """Find the first magic assignment (a = %foo) in the cell.
126 135
127 136 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
128 137 """
129 138 for line in tokens_by_line:
130 139 assign_ix = _find_assign_op(line)
131 140 if (assign_ix is not None) \
132 141 and (len(line) >= assign_ix + 2) \
133 142 and (line[assign_ix+1].string == '%') \
134 143 and (line[assign_ix+2].type == tokenize2.NAME):
135 144 return cls(line[assign_ix+1].start)
136 145
137 146 def transform(self, lines: List[str]):
138 147 """Transform a magic assignment found by find
139 148 """
140 149 start_line, start_col = self.start_line, self.start_col
141 150 lhs = lines[start_line][:start_col]
142 151 end_line = find_end_of_continued_line(lines, start_line)
143 152 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
144 153 assert rhs.startswith('%'), rhs
145 154 magic_name, _, args = rhs[1:].partition(' ')
146 155
147 156 lines_before = lines[:start_line]
148 157 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
149 158 new_line = lhs + call + '\n'
150 159 lines_after = lines[end_line+1:]
151 160
152 161 return lines_before + [new_line] + lines_after
153 162
154 163
155 164 class SystemAssign(TokenTransformBase):
156 165 @classmethod
157 166 def find(cls, tokens_by_line):
158 167 """Find the first system assignment (a = !foo) in the cell.
159 168
160 169 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
161 170 """
162 171 for line in tokens_by_line:
163 172 assign_ix = _find_assign_op(line)
164 173 if (assign_ix is not None) \
165 174 and (len(line) >= assign_ix + 2) \
166 175 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
167 176 ix = assign_ix + 1
168 177
169 178 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
170 179 if line[ix].string == '!':
171 180 return cls(line[ix].start)
172 181 elif not line[ix].string.isspace():
173 182 break
174 183 ix += 1
175 184
176 185 def transform(self, lines: List[str]):
177 186 """Transform a system assignment found by find
178 187 """
179 188 start_line, start_col = self.start_line, self.start_col
180 189
181 190 lhs = lines[start_line][:start_col]
182 191 end_line = find_end_of_continued_line(lines, start_line)
183 192 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
184 193 assert rhs.startswith('!'), rhs
185 194 cmd = rhs[1:]
186 195
187 196 lines_before = lines[:start_line]
188 197 call = "get_ipython().getoutput({!r})".format(cmd)
189 198 new_line = lhs + call + '\n'
190 199 lines_after = lines[end_line + 1:]
191 200
192 201 return lines_before + [new_line] + lines_after
193 202
194 203 # The escape sequences that define the syntax transformations IPython will
195 204 # apply to user input. These can NOT be just changed here: many regular
196 205 # expressions and other parts of the code may use their hardcoded values, and
197 206 # for all intents and purposes they constitute the 'IPython syntax', so they
198 207 # should be considered fixed.
199 208
200 209 ESC_SHELL = '!' # Send line to underlying system shell
201 210 ESC_SH_CAP = '!!' # Send line to system shell and capture output
202 211 ESC_HELP = '?' # Find information about object
203 212 ESC_HELP2 = '??' # Find extra-detailed information about object
204 213 ESC_MAGIC = '%' # Call magic function
205 214 ESC_MAGIC2 = '%%' # Call cell-magic function
206 215 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
207 216 ESC_QUOTE2 = ';' # Quote all args as a single string, call
208 217 ESC_PAREN = '/' # Call first argument with rest of line as arguments
209 218
210 219 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
211 220 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
212 221
213 222 def _make_help_call(target, esc, next_input=None):
214 223 """Prepares a pinfo(2)/psearch call from a target name and the escape
215 224 (i.e. ? or ??)"""
216 225 method = 'pinfo2' if esc == '??' \
217 226 else 'psearch' if '*' in target \
218 227 else 'pinfo'
219 228 arg = " ".join([method, target])
220 229 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
221 230 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
222 231 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
223 232 if next_input is None:
224 233 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
225 234 else:
226 235 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
227 236 (next_input, t_magic_name, t_magic_arg_s)
228 237
229 238 def _tr_help(content):
230 239 "Translate lines escaped with: ?"
231 240 # A naked help line should just fire the intro help screen
232 241 if not content:
233 242 return 'get_ipython().show_usage()'
234 243
235 244 return _make_help_call(content, '?')
236 245
237 246 def _tr_help2(content):
238 247 "Translate lines escaped with: ??"
239 248 # A naked help line should just fire the intro help screen
240 249 if not content:
241 250 return 'get_ipython().show_usage()'
242 251
243 252 return _make_help_call(content, '??')
244 253
245 254 def _tr_magic(content):
246 255 "Translate lines escaped with: %"
247 256 name, _, args = content.partition(' ')
248 257 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
249 258
250 259 def _tr_quote(content):
251 260 "Translate lines escaped with: ,"
252 261 name, _, args = content.partition(' ')
253 262 return '%s("%s")' % (name, '", "'.join(args.split()) )
254 263
255 264 def _tr_quote2(content):
256 265 "Translate lines escaped with: ;"
257 266 name, _, args = content.partition(' ')
258 267 return '%s("%s")' % (name, args)
259 268
260 269 def _tr_paren(content):
261 270 "Translate lines escaped with: /"
262 271 name, _, args = content.partition(' ')
263 272 return '%s(%s)' % (name, ", ".join(args.split()))
264 273
265 274 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
266 275 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
267 276 ESC_HELP : _tr_help,
268 277 ESC_HELP2 : _tr_help2,
269 278 ESC_MAGIC : _tr_magic,
270 279 ESC_QUOTE : _tr_quote,
271 280 ESC_QUOTE2 : _tr_quote2,
272 281 ESC_PAREN : _tr_paren }
273 282
274 283 class EscapedCommand(TokenTransformBase):
275 284 @classmethod
276 285 def find(cls, tokens_by_line):
277 286 """Find the first escaped command (%foo, !foo, etc.) in the cell.
278 287
279 288 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
280 289 """
281 290 for line in tokens_by_line:
282 291 ix = 0
283 292 while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
284 293 ix += 1
285 294 if line[ix].string in ESCAPE_SINGLES:
286 295 return cls(line[ix].start)
287 296
288 297 def transform(self, lines):
289 298 start_line, start_col = self.start_line, self.start_col
290 299
291 300 indent = lines[start_line][:start_col]
292 301 end_line = find_end_of_continued_line(lines, start_line)
293 302 line = assemble_continued_line(lines, (start_line, start_col), end_line)
294 303
295 304 if line[:2] in ESCAPE_DOUBLES:
296 305 escape, content = line[:2], line[2:]
297 306 else:
298 307 escape, content = line[:1], line[1:]
299 308 call = tr[escape](content)
300 309
301 310 lines_before = lines[:start_line]
302 311 new_line = indent + call + '\n'
303 312 lines_after = lines[end_line + 1:]
304 313
305 314 return lines_before + [new_line] + lines_after
306 315
307 316 _help_end_re = re.compile(r"""(%{0,2}
308 317 [a-zA-Z_*][\w*]* # Variable name
309 318 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
310 319 )
311 320 (\?\??)$ # ? or ??
312 321 """,
313 322 re.VERBOSE)
314 323
315 324 class HelpEnd(TokenTransformBase):
316 325 # This needs to be higher priority (lower number) than EscapedCommand so
317 326 # that inspecting magics (%foo?) works.
318 327 priority = 5
319 328
320 329 def __init__(self, start, q_locn):
321 330 super().__init__(start)
322 331 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
323 332 self.q_col = q_locn[1]
324 333
325 334 @classmethod
326 335 def find(cls, tokens_by_line):
327 336 for line in tokens_by_line:
328 337 # Last token is NEWLINE; look at last but one
329 338 if len(line) > 2 and line[-2].string == '?':
330 339 # Find the first token that's not INDENT/DEDENT
331 340 ix = 0
332 341 while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
333 342 ix += 1
334 343 return cls(line[ix].start, line[-2].start)
335 344
336 345 def transform(self, lines):
337 346 piece = ''.join(lines[self.start_line:self.q_line+1])
338 347 indent, content = piece[:self.start_col], piece[self.start_col:]
339 348 lines_before = lines[:self.start_line]
340 349 lines_after = lines[self.q_line + 1:]
341 350
342 351 m = _help_end_re.search(content)
343 352 assert m is not None, content
344 353 target = m.group(1)
345 354 esc = m.group(3)
346 355
347 356 # If we're mid-command, put it back on the next prompt for the user.
348 357 next_input = None
349 358 if (not lines_before) and (not lines_after) \
350 359 and content.strip() != m.group(0):
351 360 next_input = content.rstrip('?\n')
352 361
353 362 call = _make_help_call(target, esc, next_input=next_input)
354 363 new_line = indent + call + '\n'
355 364
356 365 return lines_before + [new_line] + lines_after
357 366
358 367 def make_tokens_by_line(lines):
359 368 tokens_by_line = [[]]
360 369 for token in generate_tokens(iter(lines).__next__):
361 370 tokens_by_line[-1].append(token)
362 371 if token.type == tokenize2.NEWLINE:
363 372 tokens_by_line.append([])
364 373
365 374 return tokens_by_line
366 375
367 376 def show_linewise_tokens(s: str):
368 377 """For investigation"""
369 378 if not s.endswith('\n'):
370 379 s += '\n'
371 380 lines = s.splitlines(keepends=True)
372 381 for line in make_tokens_by_line(lines):
373 382 print("Line -------")
374 383 for tokinfo in line:
375 384 print(" ", tokinfo)
376 385
377 386 class TransformerManager:
378 387 def __init__(self):
379 388 self.cleanup_transforms = [
380 389 leading_indent,
381 390 classic_prompt,
382 391 ipython_prompt,
383 392 ]
384 393 self.line_transforms = [
385 394 cell_magic,
386 395 ]
387 396 self.token_transformers = [
388 397 MagicAssign,
389 398 SystemAssign,
390 399 EscapedCommand,
391 400 HelpEnd,
392 401 ]
393 402
394 403 def do_one_token_transform(self, lines):
395 404 """Find and run the transform earliest in the code.
396 405
397 406 Returns (changed, lines).
398 407
399 408 This method is called repeatedly until changed is False, indicating
400 409 that all available transformations are complete.
401 410
402 411 The tokens following IPython special syntax might not be valid, so
403 412 the transformed code is retokenised every time to identify the next
404 413 piece of special syntax. Hopefully long code cells are mostly valid
405 414 Python, not using lots of IPython special syntax, so this shouldn't be
406 415 a performance issue.
407 416 """
408 417 tokens_by_line = make_tokens_by_line(lines)
409 418 candidates = []
410 419 for transformer_cls in self.token_transformers:
411 420 transformer = transformer_cls.find(tokens_by_line)
412 421 if transformer:
413 422 candidates.append(transformer)
414 423
415 424 if not candidates:
416 425 # Nothing to transform
417 426 return False, lines
418 427
419 428 transformer = min(candidates, key=TokenTransformBase.sortby)
420 429 return True, transformer.transform(lines)
421 430
422 431 def do_token_transforms(self, lines):
423 432 while True:
424 433 changed, lines = self.do_one_token_transform(lines)
425 434 if not changed:
426 435 return lines
427 436
428 437 def transform_cell(self, cell: str):
429 438 if not cell.endswith('\n'):
430 439 cell += '\n' # Ensure the cell has a trailing newline
431 440 lines = cell.splitlines(keepends=True)
432 441 for transform in self.cleanup_transforms + self.line_transforms:
433 442 #print(transform, lines)
434 443 lines = transform(lines)
435 444
436 445 lines = self.do_token_transforms(lines)
437 446 return ''.join(lines)
438 447
439 448 def check_complete(self, cell: str):
440 449 """Return whether a block of code is ready to execute, or should be continued
441 450
442 451 Parameters
443 452 ----------
444 453 source : string
445 454 Python input code, which can be multiline.
446 455
447 456 Returns
448 457 -------
449 458 status : str
450 459 One of 'complete', 'incomplete', or 'invalid' if source is not a
451 460 prefix of valid code.
452 461 indent_spaces : int or None
453 462 The number of spaces by which to indent the next line of code. If
454 463 status is not 'incomplete', this is None.
455 464 """
456 465 if not cell.endswith('\n'):
457 466 cell += '\n' # Ensure the cell has a trailing newline
458 467 lines = cell.splitlines(keepends=True)
459 468 if lines[-1][:-1].endswith('\\'):
460 469 # Explicit backslash continuation
461 470 return 'incomplete', find_last_indent(lines)
462 471
463 472 try:
464 473 for transform in self.cleanup_transforms:
465 474 lines = transform(lines)
466 475 except SyntaxError:
467 476 return 'invalid', None
468 477
469 478 if lines[0].startswith('%%'):
470 479 # Special case for cell magics - completion marked by blank line
471 480 if lines[-1].strip():
472 481 return 'incomplete', find_last_indent(lines)
473 482 else:
474 483 return 'complete', None
475 484
476 485 try:
477 486 for transform in self.line_transforms:
478 487 lines = transform(lines)
479 488 lines = self.do_token_transforms(lines)
480 489 except SyntaxError:
481 490 return 'invalid', None
482 491
483 492 tokens_by_line = make_tokens_by_line(lines)
484 493 if tokens_by_line[-1][-1].type != tokenize2.ENDMARKER:
485 494 # We're in a multiline string or expression
486 495 return 'incomplete', find_last_indent(lines)
487 496
488 497 # Find the last token on the previous line that's not NEWLINE or COMMENT
489 498 toks_last_line = tokens_by_line[-2]
490 499 ix = len(toks_last_line) - 1
491 500 while ix >= 0 and toks_last_line[ix].type in {tokenize2.NEWLINE,
492 501 tokenize2.COMMENT}:
493 502 ix -= 1
494 503
495 504 if toks_last_line[ix].string == ':':
496 505 # The last line starts a block (e.g. 'if foo:')
497 506 ix = 0
498 507 while toks_last_line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
499 508 ix += 1
500 509 indent = toks_last_line[ix].start[1]
501 510 return 'incomplete', indent + 4
502 511
503 512 # If there's a blank line at the end, assume we're ready to execute.
504 513 if not lines[-1].strip():
505 514 return 'complete', None
506 515
507 516 # At this point, our checks think the code is complete (or invalid).
508 517 # We'll use codeop.compile_command to check this with the real parser.
509 518
510 519 try:
511 520 res = compile_command(''.join(lines), symbol='exec')
512 521 except (SyntaxError, OverflowError, ValueError, TypeError,
513 522 MemoryError, SyntaxWarning):
514 523 return 'invalid', None
515 524 else:
516 525 if res is None:
517 526 return 'incomplete', find_last_indent(lines)
518 527 return 'complete', None
519 528
520 529
521 530 def find_last_indent(lines):
522 531 m = _indent_re.match(lines[-1])
523 532 if not m:
524 533 return 0
525 534 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now