##// END OF EJS Templates
run formatter
Matthias Bussonnier -
Show More
@@ -1,782 +1,787
1 1 """DEPRECATED: Input handling and transformation machinery.
2 2
3 3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
4 4
5 5 The first class in this module, :class:`InputSplitter`, is designed to tell when
6 6 input from a line-oriented frontend is complete and should be executed, and when
7 7 the user should be prompted for another line of code instead. The name 'input
8 8 splitter' is largely for historical reasons.
9 9
10 10 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
11 11 with full support for the extended IPython syntax (magics, system calls, etc).
12 12 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
13 13 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
14 14 and stores the results.
15 15
16 16 For more details, see the class docstrings below.
17 17 """
18 18
19 19 from warnings import warn
20 20
21 21 warn('IPython.core.inputsplitter is deprecated since IPython 7 in favor of `IPython.core.inputtransformer2`',
22 22 DeprecationWarning)
23 23
24 24 # Copyright (c) IPython Development Team.
25 25 # Distributed under the terms of the Modified BSD License.
26 26 import ast
27 27 import codeop
28 28 import io
29 29 import re
30 30 import sys
31 31 import tokenize
32 32 import warnings
33 33
34 34 from typing import List
35 35
36 36 from IPython.core.inputtransformer import (leading_indent,
37 37 classic_prompt,
38 38 ipy_prompt,
39 39 cellmagic,
40 40 assemble_logical_lines,
41 41 help_end,
42 42 escaped_commands,
43 43 assign_from_magic,
44 44 assign_from_system,
45 45 assemble_python_lines,
46 46 )
47 47 from IPython.utils import tokenutil
48 48
49 49 # These are available in this module for backwards compatibility.
50 50 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
51 51 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
52 52 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
53 53
54 54 #-----------------------------------------------------------------------------
55 55 # Utilities
56 56 #-----------------------------------------------------------------------------
57 57
58 58 # FIXME: These are general-purpose utilities that later can be moved to the
59 59 # general ward. Kept here for now because we're being very strict about test
60 60 # coverage with this code, and this lets us ensure that we keep 100% coverage
61 61 # while developing.
62 62
63 63 # compiled regexps for autoindent management
64 64 dedent_re = re.compile('|'.join([
65 65 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
66 66 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
67 67 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
68 68 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
69 69 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
70 70 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
71 71 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
72 72 ]))
73 73 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
74 74
75 75 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
76 76 # before pure comments
77 77 comment_line_re = re.compile(r'^\s*\#')
78 78
79 79
80 80 def num_ini_spaces(s):
81 81 """Return the number of initial spaces in a string.
82 82
83 83 Note that tabs are counted as a single space. For now, we do *not* support
84 84 mixing of tabs and spaces in the user's input.
85 85
86 86 Parameters
87 87 ----------
88 88 s : string
89 89
90 90 Returns
91 91 -------
92 92 n : int
93 93 """
94 94
95 95 ini_spaces = ini_spaces_re.match(s)
96 96 if ini_spaces:
97 97 return ini_spaces.end()
98 98 else:
99 99 return 0
100 100
101 101 # Fake token types for partial_tokenize:
102 102 INCOMPLETE_STRING = tokenize.N_TOKENS
103 103 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
104 104
105 105 # The 2 classes below have the same API as TokenInfo, but don't try to look up
106 106 # a token type name that they won't find.
107 107 class IncompleteString:
108 108 type = exact_type = INCOMPLETE_STRING
109 109 def __init__(self, s, start, end, line):
110 110 self.s = s
111 111 self.start = start
112 112 self.end = end
113 113 self.line = line
114 114
115 115 class InMultilineStatement:
116 116 type = exact_type = IN_MULTILINE_STATEMENT
117 117 def __init__(self, pos, line):
118 118 self.s = ''
119 119 self.start = self.end = pos
120 120 self.line = line
121 121
122 122 def partial_tokens(s):
123 123 """Iterate over tokens from a possibly-incomplete string of code.
124 124
125 125 This adds two special token types: INCOMPLETE_STRING and
126 126 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
127 127 represent the two main ways for code to be incomplete.
128 128 """
129 129 readline = io.StringIO(s).readline
130 130 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
131 131 try:
132 132 for token in tokenutil.generate_tokens_catch_errors(readline):
133 133 yield token
134 134 except tokenize.TokenError as e:
135 135 # catch EOF error
136 136 lines = s.splitlines(keepends=True)
137 137 end = len(lines), len(lines[-1])
138 138 if 'multi-line string' in e.args[0]:
139 139 l, c = start = token.end
140 140 s = lines[l-1][c:] + ''.join(lines[l:])
141 141 yield IncompleteString(s, start, end, lines[-1])
142 142 elif 'multi-line statement' in e.args[0]:
143 143 yield InMultilineStatement(end, lines[-1])
144 144 else:
145 145 raise
146 146
147 147 def find_next_indent(code):
148 148 """Find the number of spaces for the next line of indentation"""
149 149 tokens = list(partial_tokens(code))
150 150 if tokens[-1].type == tokenize.ENDMARKER:
151 151 tokens.pop()
152 152 if not tokens:
153 153 return 0
154 154
155 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
155 while tokens[-1].type in {
156 tokenize.DEDENT,
157 tokenize.NEWLINE,
158 tokenize.COMMENT,
159 tokenize.ERRORTOKEN,
160 }:
156 161 tokens.pop()
157 162
158 163 # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
159 164 # of input. We need to remove those if we're in a multiline statement
160 165 if tokens[-1].type == IN_MULTILINE_STATEMENT:
161 166 while tokens[-2].type in {tokenize.NL}:
162 167 tokens.pop(-2)
163 168
164 169
165 170 if tokens[-1].type == INCOMPLETE_STRING:
166 171 # Inside a multiline string
167 172 return 0
168 173
169 174 # Find the indents used before
170 175 prev_indents = [0]
171 176 def _add_indent(n):
172 177 if n != prev_indents[-1]:
173 178 prev_indents.append(n)
174 179
175 180 tokiter = iter(tokens)
176 181 for tok in tokiter:
177 182 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
178 183 _add_indent(tok.end[1])
179 184 elif (tok.type == tokenize.NL):
180 185 try:
181 186 _add_indent(next(tokiter).start[1])
182 187 except StopIteration:
183 188 break
184 189
185 190 last_indent = prev_indents.pop()
186 191
187 192 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
188 193 if tokens[-1].type == IN_MULTILINE_STATEMENT:
189 194 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
190 195 return last_indent + 4
191 196 return last_indent
192 197
193 198 if tokens[-1].exact_type == tokenize.COLON:
194 199 # Line ends with colon - indent
195 200 return last_indent + 4
196 201
197 202 if last_indent:
198 203 # Examine the last line for dedent cues - statements like return or
199 204 # raise which normally end a block of code.
200 205 last_line_starts = 0
201 206 for i, tok in enumerate(tokens):
202 207 if tok.type == tokenize.NEWLINE:
203 208 last_line_starts = i + 1
204 209
205 210 last_line_tokens = tokens[last_line_starts:]
206 211 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
207 212 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
208 213 # Find the most recent indentation less than the current level
209 214 for indent in reversed(prev_indents):
210 215 if indent < last_indent:
211 216 return indent
212 217
213 218 return last_indent
214 219
215 220
216 221 def last_blank(src):
217 222 """Determine if the input source ends in a blank.
218 223
219 224 A blank is either a newline or a line consisting of whitespace.
220 225
221 226 Parameters
222 227 ----------
223 228 src : string
224 229 A single or multiline string.
225 230 """
226 231 if not src: return False
227 232 ll = src.splitlines()[-1]
228 233 return (ll == '') or ll.isspace()
229 234
230 235
231 236 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
232 237 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
233 238
234 239 def last_two_blanks(src):
235 240 """Determine if the input source ends in two blanks.
236 241
237 242 A blank is either a newline or a line consisting of whitespace.
238 243
239 244 Parameters
240 245 ----------
241 246 src : string
242 247 A single or multiline string.
243 248 """
244 249 if not src: return False
245 250 # The logic here is tricky: I couldn't get a regexp to work and pass all
246 251 # the tests, so I took a different approach: split the source by lines,
247 252 # grab the last two and prepend '###\n' as a stand-in for whatever was in
248 253 # the body before the last two lines. Then, with that structure, it's
249 254 # possible to analyze with two regexps. Not the most elegant solution, but
250 255 # it works. If anyone tries to change this logic, make sure to validate
251 256 # the whole test suite first!
252 257 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
253 258 return (bool(last_two_blanks_re.match(new_src)) or
254 259 bool(last_two_blanks_re2.match(new_src)) )
255 260
256 261
257 262 def remove_comments(src):
258 263 """Remove all comments from input source.
259 264
260 265 Note: comments are NOT recognized inside of strings!
261 266
262 267 Parameters
263 268 ----------
264 269 src : string
265 270 A single or multiline input string.
266 271
267 272 Returns
268 273 -------
269 274 String with all Python comments removed.
270 275 """
271 276
272 277 return re.sub('#.*', '', src)
273 278
274 279
275 280 def get_input_encoding():
276 281 """Return the default standard input encoding.
277 282
278 283 If sys.stdin has no encoding, 'ascii' is returned."""
279 284 # There are strange environments for which sys.stdin.encoding is None. We
280 285 # ensure that a valid encoding is returned.
281 286 encoding = getattr(sys.stdin, 'encoding', None)
282 287 if encoding is None:
283 288 encoding = 'ascii'
284 289 return encoding
285 290
286 291 #-----------------------------------------------------------------------------
287 292 # Classes and functions for normal Python syntax handling
288 293 #-----------------------------------------------------------------------------
289 294
290 295 class InputSplitter(object):
291 296 r"""An object that can accumulate lines of Python source before execution.
292 297
293 298 This object is designed to be fed python source line-by-line, using
294 299 :meth:`push`. It will return on each push whether the currently pushed
295 300 code could be executed already. In addition, it provides a method called
296 301 :meth:`push_accepts_more` that can be used to query whether more input
297 302 can be pushed into a single interactive block.
298 303
299 304 This is a simple example of how an interactive terminal-based client can use
300 305 this tool::
301 306
302 307 isp = InputSplitter()
303 308 while isp.push_accepts_more():
304 309 indent = ' '*isp.indent_spaces
305 310 prompt = '>>> ' + indent
306 311 line = indent + raw_input(prompt)
307 312 isp.push(line)
308 313 print 'Input source was:\n', isp.source_reset(),
309 314 """
310 315 # A cache for storing the current indentation
311 316 # The first value stores the most recently processed source input
312 317 # The second value is the number of spaces for the current indentation
313 318 # If self.source matches the first value, the second value is a valid
314 319 # current indentation. Otherwise, the cache is invalid and the indentation
315 320 # must be recalculated.
316 321 _indent_spaces_cache = None, None
317 322 # String, indicating the default input encoding. It is computed by default
318 323 # at initialization time via get_input_encoding(), but it can be reset by a
319 324 # client with specific knowledge of the encoding.
320 325 encoding = ''
321 326 # String where the current full source input is stored, properly encoded.
322 327 # Reading this attribute is the normal way of querying the currently pushed
323 328 # source code, that has been properly encoded.
324 329 source = ''
325 330 # Code object corresponding to the current source. It is automatically
326 331 # synced to the source, so it can be queried at any time to obtain the code
327 332 # object; it will be None if the source doesn't compile to valid Python.
328 333 code = None
329 334
330 335 # Private attributes
331 336
332 337 # List with lines of input accumulated so far
333 338 _buffer: List[str]
334 339 # Command compiler
335 340 _compile: codeop.CommandCompiler
336 341 # Boolean indicating whether the current block is complete
337 342 _is_complete = None
338 343 # Boolean indicating whether the current block has an unrecoverable syntax error
339 344 _is_invalid = False
340 345
341 346 def __init__(self) -> None:
342 347 """Create a new InputSplitter instance."""
343 348 self._buffer = []
344 349 self._compile = codeop.CommandCompiler()
345 350 self.encoding = get_input_encoding()
346 351
347 352 def reset(self):
348 353 """Reset the input buffer and associated state."""
349 354 self._buffer[:] = []
350 355 self.source = ''
351 356 self.code = None
352 357 self._is_complete = False
353 358 self._is_invalid = False
354 359
355 360 def source_reset(self):
356 361 """Return the input source and perform a full reset.
357 362 """
358 363 out = self.source
359 364 self.reset()
360 365 return out
361 366
362 367 def check_complete(self, source):
363 368 """Return whether a block of code is ready to execute, or should be continued
364 369
365 370 This is a non-stateful API, and will reset the state of this InputSplitter.
366 371
367 372 Parameters
368 373 ----------
369 374 source : string
370 375 Python input code, which can be multiline.
371 376
372 377 Returns
373 378 -------
374 379 status : str
375 380 One of 'complete', 'incomplete', or 'invalid' if source is not a
376 381 prefix of valid code.
377 382 indent_spaces : int or None
378 383 The number of spaces by which to indent the next line of code. If
379 384 status is not 'incomplete', this is None.
380 385 """
381 386 self.reset()
382 387 try:
383 388 self.push(source)
384 389 except SyntaxError:
385 390 # Transformers in IPythonInputSplitter can raise SyntaxError,
386 391 # which push() will not catch.
387 392 return 'invalid', None
388 393 else:
389 394 if self._is_invalid:
390 395 return 'invalid', None
391 396 elif self.push_accepts_more():
392 397 return 'incomplete', self.get_indent_spaces()
393 398 else:
394 399 return 'complete', None
395 400 finally:
396 401 self.reset()
397 402
398 403 def push(self, lines:str) -> bool:
399 404 """Push one or more lines of input.
400 405
401 406 This stores the given lines and returns a status code indicating
402 407 whether the code forms a complete Python block or not.
403 408
404 409 Any exceptions generated in compilation are swallowed, but if an
405 410 exception was produced, the method returns True.
406 411
407 412 Parameters
408 413 ----------
409 414 lines : string
410 415 One or more lines of Python input.
411 416
412 417 Returns
413 418 -------
414 419 is_complete : boolean
415 420 True if the current input source (the result of the current input
416 421 plus prior inputs) forms a complete Python execution block. Note that
417 422 this value is also stored as a private attribute (``_is_complete``), so it
418 423 can be queried at any time.
419 424 """
420 425 assert isinstance(lines, str)
421 426 self._store(lines)
422 427 source = self.source
423 428
424 429 # Before calling _compile(), reset the code object to None so that if an
425 430 # exception is raised in compilation, we don't mislead by having
426 431 # inconsistent code/source attributes.
427 432 self.code, self._is_complete = None, None
428 433 self._is_invalid = False
429 434
430 435 # Honor termination lines properly
431 436 if source.endswith('\\\n'):
432 437 return False
433 438
434 439 try:
435 440 with warnings.catch_warnings():
436 441 warnings.simplefilter('error', SyntaxWarning)
437 442 self.code = self._compile(source, symbol="exec")
438 443 # Invalid syntax can produce any of a number of different errors from
439 444 # inside the compiler, so we have to catch them all. Syntax errors
440 445 # immediately produce a 'ready' block, so the invalid Python can be
441 446 # sent to the kernel for evaluation with possible ipython
442 447 # special-syntax conversion.
443 448 except (SyntaxError, OverflowError, ValueError, TypeError,
444 449 MemoryError, SyntaxWarning):
445 450 self._is_complete = True
446 451 self._is_invalid = True
447 452 else:
448 453 # Compilation didn't produce any exceptions (though it may not have
449 454 # given a complete code object)
450 455 self._is_complete = self.code is not None
451 456
452 457 return self._is_complete
453 458
454 459 def push_accepts_more(self):
455 460 """Return whether a block of interactive input can accept more input.
456 461
457 462 This method is meant to be used by line-oriented frontends, who need to
458 463 guess whether a block is complete or not based solely on prior and
459 464 current input lines. The InputSplitter considers it has a complete
460 465 interactive block and will not accept more input when either:
461 466
462 467 * A SyntaxError is raised
463 468
464 469 * The code is complete and consists of a single line or a single
465 470 non-compound statement
466 471
467 472 * The code is complete and has a blank line at the end
468 473
469 474 If the current input produces a syntax error, this method immediately
470 475 returns False but does *not* raise the syntax error exception, as
471 476 typically clients will want to send invalid syntax to an execution
472 477 backend which might convert the invalid syntax into valid Python via
473 478 one of the dynamic IPython mechanisms.
474 479 """
475 480
476 481 # With incomplete input, unconditionally accept more
477 482 # A syntax error also sets _is_complete to True - see push()
478 483 if not self._is_complete:
479 484 #print("Not complete") # debug
480 485 return True
481 486
482 487 # The user can make any (complete) input execute by leaving a blank line
483 488 last_line = self.source.splitlines()[-1]
484 489 if (not last_line) or last_line.isspace():
485 490 #print("Blank line") # debug
486 491 return False
487 492
488 493 # If there's just a single line or AST node, and we're flush left, as is
489 494 # the case after a simple statement such as 'a=1', we want to execute it
490 495 # straight away.
491 496 if self.get_indent_spaces() == 0:
492 497 if len(self.source.splitlines()) <= 1:
493 498 return False
494 499
495 500 try:
496 501 code_ast = ast.parse("".join(self._buffer))
497 502 except Exception:
498 503 #print("Can't parse AST") # debug
499 504 return False
500 505 else:
501 506 if len(code_ast.body) == 1 and \
502 507 not hasattr(code_ast.body[0], 'body'):
503 508 #print("Simple statement") # debug
504 509 return False
505 510
506 511 # General fallback - accept more code
507 512 return True
508 513
509 514 def get_indent_spaces(self):
510 515 sourcefor, n = self._indent_spaces_cache
511 516 if sourcefor == self.source:
512 517 return n
513 518
514 519 # self.source always has a trailing newline
515 520 n = find_next_indent(self.source[:-1])
516 521 self._indent_spaces_cache = (self.source, n)
517 522 return n
518 523
519 524 # Backwards compatibility. I think all code that used .indent_spaces was
520 525 # inside IPython, but we can leave this here until IPython 7 in case any
521 526 # other modules are using it. -TK, November 2017
522 527 indent_spaces = property(get_indent_spaces)
523 528
524 529 def _store(self, lines, buffer=None, store='source'):
525 530 """Store one or more lines of input.
526 531
527 532 If input lines are not newline-terminated, a newline is automatically
528 533 appended."""
529 534
530 535 if buffer is None:
531 536 buffer = self._buffer
532 537
533 538 if lines.endswith('\n'):
534 539 buffer.append(lines)
535 540 else:
536 541 buffer.append(lines+'\n')
537 542 setattr(self, store, self._set_source(buffer))
538 543
539 544 def _set_source(self, buffer):
540 545 return u''.join(buffer)
541 546
542 547
543 548 class IPythonInputSplitter(InputSplitter):
544 549 """An input splitter that recognizes all of IPython's special syntax."""
545 550
546 551 # String with raw, untransformed input.
547 552 source_raw = ''
548 553
549 554 # Flag to track when a transformer has stored input that it hasn't given
550 555 # back yet.
551 556 transformer_accumulating = False
552 557
553 558 # Flag to track when assemble_python_lines has stored input that it hasn't
554 559 # given back yet.
555 560 within_python_line = False
556 561
557 562 # Private attributes
558 563
559 564 # List with lines of raw input accumulated so far.
560 565 _buffer_raw = None
561 566
562 567 def __init__(self, line_input_checker=True, physical_line_transforms=None,
563 568 logical_line_transforms=None, python_line_transforms=None):
564 569 super(IPythonInputSplitter, self).__init__()
565 570 self._buffer_raw = []
566 571 self._validate = True
567 572
568 573 if physical_line_transforms is not None:
569 574 self.physical_line_transforms = physical_line_transforms
570 575 else:
571 576 self.physical_line_transforms = [
572 577 leading_indent(),
573 578 classic_prompt(),
574 579 ipy_prompt(),
575 580 cellmagic(end_on_blank_line=line_input_checker),
576 581 ]
577 582
578 583 self.assemble_logical_lines = assemble_logical_lines()
579 584 if logical_line_transforms is not None:
580 585 self.logical_line_transforms = logical_line_transforms
581 586 else:
582 587 self.logical_line_transforms = [
583 588 help_end(),
584 589 escaped_commands(),
585 590 assign_from_magic(),
586 591 assign_from_system(),
587 592 ]
588 593
589 594 self.assemble_python_lines = assemble_python_lines()
590 595 if python_line_transforms is not None:
591 596 self.python_line_transforms = python_line_transforms
592 597 else:
593 598 # We don't use any of these at present
594 599 self.python_line_transforms = []
595 600
596 601 @property
597 602 def transforms(self):
598 603 "Quick access to all transformers."
599 604 return self.physical_line_transforms + \
600 605 [self.assemble_logical_lines] + self.logical_line_transforms + \
601 606 [self.assemble_python_lines] + self.python_line_transforms
602 607
603 608 @property
604 609 def transforms_in_use(self):
605 610 """Transformers, excluding logical line transformers if we're in a
606 611 Python line."""
607 612 t = self.physical_line_transforms[:]
608 613 if not self.within_python_line:
609 614 t += [self.assemble_logical_lines] + self.logical_line_transforms
610 615 return t + [self.assemble_python_lines] + self.python_line_transforms
611 616
612 617 def reset(self):
613 618 """Reset the input buffer and associated state."""
614 619 super(IPythonInputSplitter, self).reset()
615 620 self._buffer_raw[:] = []
616 621 self.source_raw = ''
617 622 self.transformer_accumulating = False
618 623 self.within_python_line = False
619 624
620 625 for t in self.transforms:
621 626 try:
622 627 t.reset()
623 628 except SyntaxError:
624 629 # Nothing that calls reset() expects to handle transformer
625 630 # errors
626 631 pass
627 632
628 633 def flush_transformers(self):
629 634 def _flush(transform, outs):
630 635 """yield transformed lines
631 636
632 637 always strings, never None
633 638
634 639 transform: the current transform
635 640 outs: an iterable of previously transformed inputs.
636 641 Each may be multiline, which will be passed
637 642 one line at a time to transform.
638 643 """
639 644 for out in outs:
640 645 for line in out.splitlines():
641 646 # push one line at a time
642 647 tmp = transform.push(line)
643 648 if tmp is not None:
644 649 yield tmp
645 650
646 651 # reset the transform
647 652 tmp = transform.reset()
648 653 if tmp is not None:
649 654 yield tmp
650 655
651 656 out = []
652 657 for t in self.transforms_in_use:
653 658 out = _flush(t, out)
654 659
655 660 out = list(out)
656 661 if out:
657 662 self._store('\n'.join(out))
658 663
659 664 def raw_reset(self):
660 665 """Return raw input only and perform a full reset.
661 666 """
662 667 out = self.source_raw
663 668 self.reset()
664 669 return out
665 670
666 671 def source_reset(self):
667 672 try:
668 673 self.flush_transformers()
669 674 return self.source
670 675 finally:
671 676 self.reset()
672 677
673 678 def push_accepts_more(self):
674 679 if self.transformer_accumulating:
675 680 return True
676 681 else:
677 682 return super(IPythonInputSplitter, self).push_accepts_more()
678 683
679 684 def transform_cell(self, cell):
680 685 """Process and translate a cell of input.
681 686 """
682 687 self.reset()
683 688 try:
684 689 self.push(cell)
685 690 self.flush_transformers()
686 691 return self.source
687 692 finally:
688 693 self.reset()
689 694
690 695 def push(self, lines:str) -> bool:
691 696 """Push one or more lines of IPython input.
692 697
693 698 This stores the given lines and returns a status code indicating
694 699 whether the code forms a complete Python block or not, after processing
695 700 all input lines for special IPython syntax.
696 701
697 702 Any exceptions generated in compilation are swallowed, but if an
698 703 exception was produced, the method returns True.
699 704
700 705 Parameters
701 706 ----------
702 707 lines : string
703 708 One or more lines of Python input.
704 709
705 710 Returns
706 711 -------
707 712 is_complete : boolean
708 713 True if the current input source (the result of the current input
709 714 plus prior inputs) forms a complete Python execution block. Note that
710 715 this value is also stored as a private attribute (_is_complete), so it
711 716 can be queried at any time.
712 717 """
713 718 assert isinstance(lines, str)
714 719 # We must ensure all input is pure unicode
715 720 # ''.splitlines() --> [], but we need to push the empty line to transformers
716 721 lines_list = lines.splitlines()
717 722 if not lines_list:
718 723 lines_list = ['']
719 724
720 725 # Store raw source before applying any transformations to it. Note
721 726 # that this must be done *after* the reset() call that would otherwise
722 727 # flush the buffer.
723 728 self._store(lines, self._buffer_raw, 'source_raw')
724 729
725 730 transformed_lines_list = []
726 731 for line in lines_list:
727 732 transformed = self._transform_line(line)
728 733 if transformed is not None:
729 734 transformed_lines_list.append(transformed)
730 735
731 736 if transformed_lines_list:
732 737 transformed_lines = '\n'.join(transformed_lines_list)
733 738 return super(IPythonInputSplitter, self).push(transformed_lines)
734 739 else:
735 740 # Got nothing back from transformers - they must be waiting for
736 741 # more input.
737 742 return False
738 743
739 744 def _transform_line(self, line):
740 745 """Push a line of input code through the various transformers.
741 746
742 747 Returns any output from the transformers, or None if a transformer
743 748 is accumulating lines.
744 749
745 750 Sets self.transformer_accumulating as a side effect.
746 751 """
747 752 def _accumulating(dbg):
748 753 #print(dbg)
749 754 self.transformer_accumulating = True
750 755 return None
751 756
752 757 for transformer in self.physical_line_transforms:
753 758 line = transformer.push(line)
754 759 if line is None:
755 760 return _accumulating(transformer)
756 761
757 762 if not self.within_python_line:
758 763 line = self.assemble_logical_lines.push(line)
759 764 if line is None:
760 765 return _accumulating('acc logical line')
761 766
762 767 for transformer in self.logical_line_transforms:
763 768 line = transformer.push(line)
764 769 if line is None:
765 770 return _accumulating(transformer)
766 771
767 772 line = self.assemble_python_lines.push(line)
768 773 if line is None:
769 774 self.within_python_line = True
770 775 return _accumulating('acc python line')
771 776 else:
772 777 self.within_python_line = False
773 778
774 779 for transformer in self.python_line_transforms:
775 780 line = transformer.push(line)
776 781 if line is None:
777 782 return _accumulating(transformer)
778 783
779 784 #print("transformers clear") #debug
780 785 self.transformer_accumulating = False
781 786 return line
782 787
@@ -1,823 +1,827
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 import ast
14 14 from codeop import CommandCompiler, Compile
15 15 import re
16 16 import sys
17 17 import tokenize
18 18 from typing import List, Tuple, Optional, Any
19 19 import warnings
20 20
21 21 from IPython.utils import tokenutil
22 22
23 23 _indent_re = re.compile(r'^[ \t]+')
24 24
25 25 def leading_empty_lines(lines):
26 26 """Remove leading empty lines
27 27
28 28 If the leading lines are empty or contain only whitespace, they will be
29 29 removed.
30 30 """
31 31 if not lines:
32 32 return lines
33 33 for i, line in enumerate(lines):
34 34 if line and not line.isspace():
35 35 return lines[i:]
36 36 return lines
37 37
38 38 def leading_indent(lines):
39 39 """Remove leading indentation.
40 40
41 41 If the first line starts with a spaces or tabs, the same whitespace will be
42 42 removed from each following line in the cell.
43 43 """
44 44 if not lines:
45 45 return lines
46 46 m = _indent_re.match(lines[0])
47 47 if not m:
48 48 return lines
49 49 space = m.group(0)
50 50 n = len(space)
51 51 return [l[n:] if l.startswith(space) else l
52 52 for l in lines]
53 53
54 54 class PromptStripper:
55 55 """Remove matching input prompts from a block of input.
56 56
57 57 Parameters
58 58 ----------
59 59 prompt_re : regular expression
60 60 A regular expression matching any input prompt (including continuation,
61 61 e.g. ``...``)
62 62 initial_re : regular expression, optional
63 63 A regular expression matching only the initial prompt, but not continuation.
64 64 If no initial expression is given, prompt_re will be used everywhere.
65 65 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
66 66 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
67 67
68 68 Notes
69 69 -----
70 70
71 71 If initial_re and prompt_re differ,
72 72 only initial_re will be tested against the first line.
73 73 If any prompt is found on the first two lines,
74 74 prompts will be stripped from the rest of the block.
75 75 """
76 76 def __init__(self, prompt_re, initial_re=None):
77 77 self.prompt_re = prompt_re
78 78 self.initial_re = initial_re or prompt_re
79 79
80 80 def _strip(self, lines):
81 81 return [self.prompt_re.sub('', l, count=1) for l in lines]
82 82
83 83 def __call__(self, lines):
84 84 if not lines:
85 85 return lines
86 86 if self.initial_re.match(lines[0]) or \
87 87 (len(lines) > 1 and self.prompt_re.match(lines[1])):
88 88 return self._strip(lines)
89 89 return lines
90 90
91 91 classic_prompt = PromptStripper(
92 92 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
93 93 initial_re=re.compile(r'^>>>( |$)')
94 94 )
95 95
96 96 ipython_prompt = PromptStripper(
97 97 re.compile(
98 98 r"""
99 99 ^( # Match from the beginning of a line, either:
100 100
101 101 # 1. First-line prompt:
102 102 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
103 103 In\ # The 'In' of the prompt, with a space
104 104 \[\d+\]: # Command index, as displayed in the prompt
105 105 \ # With a mandatory trailing space
106 106
107 107 | # ... or ...
108 108
109 109 # 2. The three dots of the multiline prompt
110 110 \s* # All leading whitespace characters
111 111 \.{3,}: # The three (or more) dots
112 112 \ ? # With an optional trailing space
113 113
114 114 )
115 115 """,
116 116 re.VERBOSE,
117 117 )
118 118 )
119 119
120 120
121 121 def cell_magic(lines):
122 122 if not lines or not lines[0].startswith('%%'):
123 123 return lines
124 124 if re.match(r'%%\w+\?', lines[0]):
125 125 # This case will be handled by help_end
126 126 return lines
127 127 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
128 128 body = ''.join(lines[1:])
129 129 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
130 130 % (magic_name, first_line, body)]
131 131
132 132
133 133 def _find_assign_op(token_line) -> Optional[int]:
134 134 """Get the index of the first assignment in the line ('=' not inside brackets)
135 135
136 136 Note: We don't try to support multiple special assignment (a = b = %foo)
137 137 """
138 138 paren_level = 0
139 139 for i, ti in enumerate(token_line):
140 140 s = ti.string
141 141 if s == '=' and paren_level == 0:
142 142 return i
143 143 if s in {'(','[','{'}:
144 144 paren_level += 1
145 145 elif s in {')', ']', '}'}:
146 146 if paren_level > 0:
147 147 paren_level -= 1
148 148 return None
149 149
150 150 def find_end_of_continued_line(lines, start_line: int):
151 151 """Find the last line of a line explicitly extended using backslashes.
152 152
153 153 Uses 0-indexed line numbers.
154 154 """
155 155 end_line = start_line
156 156 while lines[end_line].endswith('\\\n'):
157 157 end_line += 1
158 158 if end_line >= len(lines):
159 159 break
160 160 return end_line
161 161
162 162 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
163 163 r"""Assemble a single line from multiple continued line pieces
164 164
165 165 Continued lines are lines ending in ``\``, and the line following the last
166 166 ``\`` in the block.
167 167
168 168 For example, this code continues over multiple lines::
169 169
170 170 if (assign_ix is not None) \
171 171 and (len(line) >= assign_ix + 2) \
172 172 and (line[assign_ix+1].string == '%') \
173 173 and (line[assign_ix+2].type == tokenize.NAME):
174 174
175 175 This statement contains four continued line pieces.
176 176 Assembling these pieces into a single line would give::
177 177
178 178 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
179 179
180 180 This uses 0-indexed line numbers. *start* is (lineno, colno).
181 181
182 182 Used to allow ``%magic`` and ``!system`` commands to be continued over
183 183 multiple lines.
184 184 """
185 185 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
186 186 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
187 187 + [parts[-1].rstrip()]) # Strip newline from last line
188 188
189 189 class TokenTransformBase:
190 190 """Base class for transformations which examine tokens.
191 191
192 192 Special syntax should not be transformed when it occurs inside strings or
193 193 comments. This is hard to reliably avoid with regexes. The solution is to
194 194 tokenise the code as Python, and recognise the special syntax in the tokens.
195 195
196 196 IPython's special syntax is not valid Python syntax, so tokenising may go
197 197 wrong after the special syntax starts. These classes therefore find and
198 198 transform *one* instance of special syntax at a time into regular Python
199 199 syntax. After each transformation, tokens are regenerated to find the next
200 200 piece of special syntax.
201 201
202 202 Subclasses need to implement one class method (find)
203 203 and one regular method (transform).
204 204
205 205 The priority attribute can select which transformation to apply if multiple
206 206 transformers match in the same place. Lower numbers have higher priority.
207 207 This allows "%magic?" to be turned into a help call rather than a magic call.
208 208 """
209 209 # Lower numbers -> higher priority (for matches in the same location)
210 210 priority = 10
211 211
212 212 def sortby(self):
213 213 return self.start_line, self.start_col, self.priority
214 214
215 215 def __init__(self, start):
216 216 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
217 217 self.start_col = start[1]
218 218
219 219 @classmethod
220 220 def find(cls, tokens_by_line):
221 221 """Find one instance of special syntax in the provided tokens.
222 222
223 223 Tokens are grouped into logical lines for convenience,
224 224 so it is easy to e.g. look at the first token of each line.
225 225 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
226 226
227 227 This should return an instance of its class, pointing to the start
228 228 position it has found, or None if it found no match.
229 229 """
230 230 raise NotImplementedError
231 231
232 232 def transform(self, lines: List[str]):
233 233 """Transform one instance of special syntax found by ``find()``
234 234
235 235 Takes a list of strings representing physical lines,
236 236 returns a similar list of transformed lines.
237 237 """
238 238 raise NotImplementedError
239 239
240 240 class MagicAssign(TokenTransformBase):
241 241 """Transformer for assignments from magics (a = %foo)"""
242 242 @classmethod
243 243 def find(cls, tokens_by_line):
244 244 """Find the first magic assignment (a = %foo) in the cell.
245 245 """
246 246 for line in tokens_by_line:
247 247 assign_ix = _find_assign_op(line)
248 248 if (assign_ix is not None) \
249 249 and (len(line) >= assign_ix + 2) \
250 250 and (line[assign_ix+1].string == '%') \
251 251 and (line[assign_ix+2].type == tokenize.NAME):
252 252 return cls(line[assign_ix+1].start)
253 253
254 254 def transform(self, lines: List[str]):
255 255 """Transform a magic assignment found by the ``find()`` classmethod.
256 256 """
257 257 start_line, start_col = self.start_line, self.start_col
258 258 lhs = lines[start_line][:start_col]
259 259 end_line = find_end_of_continued_line(lines, start_line)
260 260 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
261 261 assert rhs.startswith('%'), rhs
262 262 magic_name, _, args = rhs[1:].partition(' ')
263 263
264 264 lines_before = lines[:start_line]
265 265 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
266 266 new_line = lhs + call + '\n'
267 267 lines_after = lines[end_line+1:]
268 268
269 269 return lines_before + [new_line] + lines_after
270 270
271 271
272 272 class SystemAssign(TokenTransformBase):
273 273 """Transformer for assignments from system commands (a = !foo)"""
274 274 @classmethod
275 275 def find_pre_312(cls, tokens_by_line):
276 276 for line in tokens_by_line:
277 277 assign_ix = _find_assign_op(line)
278 278 if (assign_ix is not None) \
279 279 and not line[assign_ix].line.strip().startswith('=') \
280 280 and (len(line) >= assign_ix + 2) \
281 281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
282 282 ix = assign_ix + 1
283 283
284 284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
285 285 if line[ix].string == '!':
286 286 return cls(line[ix].start)
287 287 elif not line[ix].string.isspace():
288 288 break
289 289 ix += 1
290 290
291 291 @classmethod
292 292 def find_post_312(cls, tokens_by_line):
293 293 for line in tokens_by_line:
294 294 assign_ix = _find_assign_op(line)
295 if (assign_ix is not None) \
296 and not line[assign_ix].line.strip().startswith('=') \
297 and (len(line) >= assign_ix + 2) \
298 and (line[assign_ix + 1].type == tokenize.OP) \
299 and (line[assign_ix + 1].string == '!'):
295 if (
296 (assign_ix is not None)
297 and not line[assign_ix].line.strip().startswith("=")
298 and (len(line) >= assign_ix + 2)
299 and (line[assign_ix + 1].type == tokenize.OP)
300 and (line[assign_ix + 1].string == "!")
301 ):
300 302 return cls(line[assign_ix + 1].start)
301 303
302 304 @classmethod
303 305 def find(cls, tokens_by_line):
304 """Find the first system assignment (a = !foo) in the cell.
305 """
306 """Find the first system assignment (a = !foo) in the cell."""
306 307 if sys.version_info < (3, 12):
307 308 return cls.find_pre_312(tokens_by_line)
308 309 return cls.find_post_312(tokens_by_line)
309 310
310 311 def transform(self, lines: List[str]):
311 312 """Transform a system assignment found by the ``find()`` classmethod.
312 313 """
313 314 start_line, start_col = self.start_line, self.start_col
314 315
315 316 lhs = lines[start_line][:start_col]
316 317 end_line = find_end_of_continued_line(lines, start_line)
317 318 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
318 319 assert rhs.startswith('!'), rhs
319 320 cmd = rhs[1:]
320 321
321 322 lines_before = lines[:start_line]
322 323 call = "get_ipython().getoutput({!r})".format(cmd)
323 324 new_line = lhs + call + '\n'
324 325 lines_after = lines[end_line + 1:]
325 326
326 327 return lines_before + [new_line] + lines_after
327 328
328 329 # The escape sequences that define the syntax transformations IPython will
329 330 # apply to user input. These can NOT be just changed here: many regular
330 331 # expressions and other parts of the code may use their hardcoded values, and
331 332 # for all intents and purposes they constitute the 'IPython syntax', so they
332 333 # should be considered fixed.
333 334
334 335 ESC_SHELL = '!' # Send line to underlying system shell
335 336 ESC_SH_CAP = '!!' # Send line to system shell and capture output
336 337 ESC_HELP = '?' # Find information about object
337 338 ESC_HELP2 = '??' # Find extra-detailed information about object
338 339 ESC_MAGIC = '%' # Call magic function
339 340 ESC_MAGIC2 = '%%' # Call cell-magic function
340 341 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
341 342 ESC_QUOTE2 = ';' # Quote all args as a single string, call
342 343 ESC_PAREN = '/' # Call first argument with rest of line as arguments
343 344
344 345 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
345 346 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
346 347
347 348 def _make_help_call(target, esc):
348 349 """Prepares a pinfo(2)/psearch call from a target name and the escape
349 350 (i.e. ? or ??)"""
350 351 method = 'pinfo2' if esc == '??' \
351 352 else 'psearch' if '*' in target \
352 353 else 'pinfo'
353 354 arg = " ".join([method, target])
354 355 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
355 356 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
356 357 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
357 358 return "get_ipython().run_line_magic(%r, %r)" % (t_magic_name, t_magic_arg_s)
358 359
359 360
360 361 def _tr_help(content):
361 362 """Translate lines escaped with: ?
362 363
363 364 A naked help line should fire the intro help screen (shell.show_usage())
364 365 """
365 366 if not content:
366 367 return 'get_ipython().show_usage()'
367 368
368 369 return _make_help_call(content, '?')
369 370
370 371 def _tr_help2(content):
371 372 """Translate lines escaped with: ??
372 373
373 374 A naked help line should fire the intro help screen (shell.show_usage())
374 375 """
375 376 if not content:
376 377 return 'get_ipython().show_usage()'
377 378
378 379 return _make_help_call(content, '??')
379 380
380 381 def _tr_magic(content):
381 382 "Translate lines escaped with a percent sign: %"
382 383 name, _, args = content.partition(' ')
383 384 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
384 385
385 386 def _tr_quote(content):
386 387 "Translate lines escaped with a comma: ,"
387 388 name, _, args = content.partition(' ')
388 389 return '%s("%s")' % (name, '", "'.join(args.split()) )
389 390
390 391 def _tr_quote2(content):
391 392 "Translate lines escaped with a semicolon: ;"
392 393 name, _, args = content.partition(' ')
393 394 return '%s("%s")' % (name, args)
394 395
395 396 def _tr_paren(content):
396 397 "Translate lines escaped with a slash: /"
397 398 name, _, args = content.partition(' ')
398 399 return '%s(%s)' % (name, ", ".join(args.split()))
399 400
400 401 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
401 402 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
402 403 ESC_HELP : _tr_help,
403 404 ESC_HELP2 : _tr_help2,
404 405 ESC_MAGIC : _tr_magic,
405 406 ESC_QUOTE : _tr_quote,
406 407 ESC_QUOTE2 : _tr_quote2,
407 408 ESC_PAREN : _tr_paren }
408 409
409 410 class EscapedCommand(TokenTransformBase):
410 411 """Transformer for escaped commands like %foo, !foo, or /foo"""
411 412 @classmethod
412 413 def find(cls, tokens_by_line):
413 414 """Find the first escaped command (%foo, !foo, etc.) in the cell.
414 415 """
415 416 for line in tokens_by_line:
416 417 if not line:
417 418 continue
418 419 ix = 0
419 420 ll = len(line)
420 421 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
421 422 ix += 1
422 423 if ix >= ll:
423 424 continue
424 425 if line[ix].string in ESCAPE_SINGLES:
425 426 return cls(line[ix].start)
426 427
427 428 def transform(self, lines):
428 429 """Transform an escaped line found by the ``find()`` classmethod.
429 430 """
430 431 start_line, start_col = self.start_line, self.start_col
431 432
432 433 indent = lines[start_line][:start_col]
433 434 end_line = find_end_of_continued_line(lines, start_line)
434 435 line = assemble_continued_line(lines, (start_line, start_col), end_line)
435 436
436 437 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
437 438 escape, content = line[:2], line[2:]
438 439 else:
439 440 escape, content = line[:1], line[1:]
440 441
441 442 if escape in tr:
442 443 call = tr[escape](content)
443 444 else:
444 445 call = ''
445 446
446 447 lines_before = lines[:start_line]
447 448 new_line = indent + call + '\n'
448 449 lines_after = lines[end_line + 1:]
449 450
450 451 return lines_before + [new_line] + lines_after
451 452
452 453
453 454 _help_end_re = re.compile(
454 455 r"""(%{0,2}
455 456 (?!\d)[\w*]+ # Variable name
456 457 (\.(?!\d)[\w*]+|\[-?[0-9]+\])* # .etc.etc or [0], we only support literal integers.
457 458 )
458 459 (\?\??)$ # ? or ??
459 460 """,
460 461 re.VERBOSE,
461 462 )
462 463
463 464
464 465 class HelpEnd(TokenTransformBase):
465 466 """Transformer for help syntax: obj? and obj??"""
466 467 # This needs to be higher priority (lower number) than EscapedCommand so
467 468 # that inspecting magics (%foo?) works.
468 469 priority = 5
469 470
470 471 def __init__(self, start, q_locn):
471 472 super().__init__(start)
472 473 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
473 474 self.q_col = q_locn[1]
474 475
475 476 @classmethod
476 477 def find(cls, tokens_by_line):
477 478 """Find the first help command (foo?) in the cell.
478 479 """
479 480 for line in tokens_by_line:
480 481 # Last token is NEWLINE; look at last but one
481 482 if len(line) > 2 and line[-2].string == '?':
482 483 # Find the first token that's not INDENT/DEDENT
483 484 ix = 0
484 485 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
485 486 ix += 1
486 487 return cls(line[ix].start, line[-2].start)
487 488
488 489 def transform(self, lines):
489 490 """Transform a help command found by the ``find()`` classmethod.
490 491 """
491 492
492 493 piece = "".join(lines[self.start_line : self.q_line + 1])
493 494 indent, content = piece[: self.start_col], piece[self.start_col :]
494 495 lines_before = lines[: self.start_line]
495 496 lines_after = lines[self.q_line + 1 :]
496 497
497 498 m = _help_end_re.search(content)
498 499 if not m:
499 500 raise SyntaxError(content)
500 501 assert m is not None, content
501 502 target = m.group(1)
502 503 esc = m.group(3)
503 504
504 505
505 506 call = _make_help_call(target, esc)
506 507 new_line = indent + call + '\n'
507 508
508 509 return lines_before + [new_line] + lines_after
509 510
510 511 def make_tokens_by_line(lines:List[str]):
511 512 """Tokenize a series of lines and group tokens by line.
512 513
513 514 The tokens for a multiline Python string or expression are grouped as one
514 515 line. All lines except the last lines should keep their line ending ('\\n',
515 516 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
516 517 for example when passing block of text to this function.
517 518
518 519 """
519 520 # NL tokens are used inside multiline expressions, but also after blank
520 521 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
521 522 # We want to group the former case together but split the latter, so we
522 523 # track parentheses level, similar to the internals of tokenize.
523 524
524 525 # reexported from token on 3.7+
525 526 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
526 527 tokens_by_line: List[List[Any]] = [[]]
527 528 if len(lines) > 1 and not lines[0].endswith(("\n", "\r", "\r\n", "\x0b", "\x0c")):
528 529 warnings.warn(
529 530 "`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified",
530 531 stacklevel=2,
531 532 )
532 533 parenlev = 0
533 534 try:
534 for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
535 extra_errors_to_catch=['expected EOF']):
535 for token in tokenutil.generate_tokens_catch_errors(
536 iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
537 ):
536 538 tokens_by_line[-1].append(token)
537 539 if (token.type == NEWLINE) \
538 540 or ((token.type == NL) and (parenlev <= 0)):
539 541 tokens_by_line.append([])
540 542 elif token.string in {'(', '[', '{'}:
541 543 parenlev += 1
542 544 elif token.string in {')', ']', '}'}:
543 545 if parenlev > 0:
544 546 parenlev -= 1
545 547 except tokenize.TokenError:
546 548 # Input ended in a multiline string or expression. That's OK for us.
547 549 pass
548 550
549 551
550 552 if not tokens_by_line[-1]:
551 553 tokens_by_line.pop()
552 554
553 555
554 556 return tokens_by_line
555 557
556 558
557 559 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
558 560 """Check if the depth of brackets in the list of tokens drops below 0"""
559 561 parenlev = 0
560 562 for token in tokens:
561 563 if token.string in {"(", "[", "{"}:
562 564 parenlev += 1
563 565 elif token.string in {")", "]", "}"}:
564 566 parenlev -= 1
565 567 if parenlev < 0:
566 568 return True
567 569 return False
568 570
569 571
570 572 def show_linewise_tokens(s: str):
571 573 """For investigation and debugging"""
572 574 warnings.warn(
573 575 "show_linewise_tokens is deprecated since IPython 8.6",
574 576 DeprecationWarning,
575 577 stacklevel=2,
576 578 )
577 579 if not s.endswith("\n"):
578 580 s += "\n"
579 581 lines = s.splitlines(keepends=True)
580 582 for line in make_tokens_by_line(lines):
581 583 print("Line -------")
582 584 for tokinfo in line:
583 585 print(" ", tokinfo)
584 586
585 587 # Arbitrary limit to prevent getting stuck in infinite loops
586 588 TRANSFORM_LOOP_LIMIT = 500
587 589
588 590 class TransformerManager:
589 591 """Applies various transformations to a cell or code block.
590 592
591 593 The key methods for external use are ``transform_cell()``
592 594 and ``check_complete()``.
593 595 """
594 596 def __init__(self):
595 597 self.cleanup_transforms = [
596 598 leading_empty_lines,
597 599 leading_indent,
598 600 classic_prompt,
599 601 ipython_prompt,
600 602 ]
601 603 self.line_transforms = [
602 604 cell_magic,
603 605 ]
604 606 self.token_transformers = [
605 607 MagicAssign,
606 608 SystemAssign,
607 609 EscapedCommand,
608 610 HelpEnd,
609 611 ]
610 612
611 613 def do_one_token_transform(self, lines):
612 614 """Find and run the transform earliest in the code.
613 615
614 616 Returns (changed, lines).
615 617
616 618 This method is called repeatedly until changed is False, indicating
617 619 that all available transformations are complete.
618 620
619 621 The tokens following IPython special syntax might not be valid, so
620 622 the transformed code is retokenised every time to identify the next
621 623 piece of special syntax. Hopefully long code cells are mostly valid
622 624 Python, not using lots of IPython special syntax, so this shouldn't be
623 625 a performance issue.
624 626 """
625 627 tokens_by_line = make_tokens_by_line(lines)
626 628 candidates = []
627 629 for transformer_cls in self.token_transformers:
628 630 transformer = transformer_cls.find(tokens_by_line)
629 631 if transformer:
630 632 candidates.append(transformer)
631 633
632 634 if not candidates:
633 635 # Nothing to transform
634 636 return False, lines
635 637 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
636 638 for transformer in ordered_transformers:
637 639 try:
638 640 return True, transformer.transform(lines)
639 641 except SyntaxError:
640 642 pass
641 643 return False, lines
642 644
643 645 def do_token_transforms(self, lines):
644 646 for _ in range(TRANSFORM_LOOP_LIMIT):
645 647 changed, lines = self.do_one_token_transform(lines)
646 648 if not changed:
647 649 return lines
648 650
649 651 raise RuntimeError("Input transformation still changing after "
650 652 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
651 653
652 654 def transform_cell(self, cell: str) -> str:
653 655 """Transforms a cell of input code"""
654 656 if not cell.endswith('\n'):
655 657 cell += '\n' # Ensure the cell has a trailing newline
656 658 lines = cell.splitlines(keepends=True)
657 659 for transform in self.cleanup_transforms + self.line_transforms:
658 660 lines = transform(lines)
659 661
660 662 lines = self.do_token_transforms(lines)
661 663 return ''.join(lines)
662 664
663 665 def check_complete(self, cell: str):
664 666 """Return whether a block of code is ready to execute, or should be continued
665 667
666 668 Parameters
667 669 ----------
668 670 cell : string
669 671 Python input code, which can be multiline.
670 672
671 673 Returns
672 674 -------
673 675 status : str
674 676 One of 'complete', 'incomplete', or 'invalid' if source is not a
675 677 prefix of valid code.
676 678 indent_spaces : int or None
677 679 The number of spaces by which to indent the next line of code. If
678 680 status is not 'incomplete', this is None.
679 681 """
680 682 # Remember if the lines ends in a new line.
681 683 ends_with_newline = False
682 684 for character in reversed(cell):
683 685 if character == '\n':
684 686 ends_with_newline = True
685 687 break
686 688 elif character.strip():
687 689 break
688 690 else:
689 691 continue
690 692
691 693 if not ends_with_newline:
692 694 # Append an newline for consistent tokenization
693 695 # See https://bugs.python.org/issue33899
694 696 cell += '\n'
695 697
696 698 lines = cell.splitlines(keepends=True)
697 699
698 700 if not lines:
699 701 return 'complete', None
700 702
701 703 for line in reversed(lines):
702 704 if not line.strip():
703 705 continue
704 elif line.strip('\n').endswith('\\'):
705 return 'incomplete', find_last_indent(lines)
706 elif line.strip("\n").endswith("\\"):
707 return "incomplete", find_last_indent(lines)
706 708 else:
707 709 break
708 710
709 711 try:
710 712 for transform in self.cleanup_transforms:
711 713 if not getattr(transform, 'has_side_effects', False):
712 714 lines = transform(lines)
713 715 except SyntaxError:
714 716 return 'invalid', None
715 717
716 718 if lines[0].startswith('%%'):
717 719 # Special case for cell magics - completion marked by blank line
718 720 if lines[-1].strip():
719 721 return 'incomplete', find_last_indent(lines)
720 722 else:
721 723 return 'complete', None
722 724
723 725 try:
724 726 for transform in self.line_transforms:
725 727 if not getattr(transform, 'has_side_effects', False):
726 728 lines = transform(lines)
727 729 lines = self.do_token_transforms(lines)
728 730 except SyntaxError:
729 731 return 'invalid', None
730 732
731 733 tokens_by_line = make_tokens_by_line(lines)
732 734
733 735 # Bail if we got one line and there are more closing parentheses than
734 736 # the opening ones
735 737 if (
736 738 len(lines) == 1
737 739 and tokens_by_line
738 740 and has_sunken_brackets(tokens_by_line[0])
739 741 ):
740 742 return "invalid", None
741 743
742 744 if not tokens_by_line:
743 745 return 'incomplete', find_last_indent(lines)
744 746
745 if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
746 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
747 if (
748 tokens_by_line[-1][-1].type != tokenize.ENDMARKER
749 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
750 ):
747 751 # We're in a multiline string or expression
748 752 return 'incomplete', find_last_indent(lines)
749 753
750 754 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
751 755
752 756 # Pop the last line which only contains DEDENTs and ENDMARKER
753 757 last_token_line = None
754 758 if {t.type for t in tokens_by_line[-1]} in [
755 759 {tokenize.DEDENT, tokenize.ENDMARKER},
756 760 {tokenize.ENDMARKER}
757 761 ] and len(tokens_by_line) > 1:
758 762 last_token_line = tokens_by_line.pop()
759 763
760 764 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
761 765 tokens_by_line[-1].pop()
762 766
763 767 if not tokens_by_line[-1]:
764 768 return 'incomplete', find_last_indent(lines)
765 769
766 770 if tokens_by_line[-1][-1].string == ':':
767 771 # The last line starts a block (e.g. 'if foo:')
768 772 ix = 0
769 773 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
770 774 ix += 1
771 775
772 776 indent = tokens_by_line[-1][ix].start[1]
773 777 return 'incomplete', indent + 4
774 778
775 779 if tokens_by_line[-1][0].line.endswith('\\'):
776 780 return 'incomplete', None
777 781
778 782 # At this point, our checks think the code is complete (or invalid).
779 783 # We'll use codeop.compile_command to check this with the real parser
780 784 try:
781 785 with warnings.catch_warnings():
782 786 warnings.simplefilter('error', SyntaxWarning)
783 787 res = compile_command(''.join(lines), symbol='exec')
784 788 except (SyntaxError, OverflowError, ValueError, TypeError,
785 789 MemoryError, SyntaxWarning):
786 790 return 'invalid', None
787 791 else:
788 792 if res is None:
789 793 return 'incomplete', find_last_indent(lines)
790 794
791 795 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
792 796 if ends_with_newline:
793 797 return 'complete', None
794 798 return 'incomplete', find_last_indent(lines)
795 799
796 800 # If there's a blank line at the end, assume we're ready to execute
797 801 if not lines[-1].strip():
798 802 return 'complete', None
799 803
800 804 return 'complete', None
801 805
802 806
803 807 def find_last_indent(lines):
804 808 m = _indent_re.match(lines[-1])
805 809 if not m:
806 810 return 0
807 811 return len(m.group(0).replace('\t', ' '*4))
808 812
809 813
810 814 class MaybeAsyncCompile(Compile):
811 815 def __init__(self, extra_flags=0):
812 816 super().__init__()
813 817 self.flags |= extra_flags
814 818
815 819
816 820 class MaybeAsyncCommandCompiler(CommandCompiler):
817 821 def __init__(self, extra_flags=0):
818 822 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
819 823
820 824
821 825 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
822 826
823 827 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
@@ -1,447 +1,448
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 7 import platform
8 8 import string
9 9 import sys
10 10 from textwrap import dedent
11 11
12 12 import pytest
13 13
14 14 from IPython.core import inputtransformer2 as ipt2
15 15 from IPython.core.inputtransformer2 import _find_assign_op, make_tokens_by_line
16 16
17 17 MULTILINE_MAGIC = (
18 18 """\
19 19 a = f()
20 20 %foo \\
21 21 bar
22 22 g()
23 23 """.splitlines(
24 24 keepends=True
25 25 ),
26 26 (2, 0),
27 27 """\
28 28 a = f()
29 29 get_ipython().run_line_magic('foo', ' bar')
30 30 g()
31 31 """.splitlines(
32 32 keepends=True
33 33 ),
34 34 )
35 35
36 36 INDENTED_MAGIC = (
37 37 """\
38 38 for a in range(5):
39 39 %ls
40 40 """.splitlines(
41 41 keepends=True
42 42 ),
43 43 (2, 4),
44 44 """\
45 45 for a in range(5):
46 46 get_ipython().run_line_magic('ls', '')
47 47 """.splitlines(
48 48 keepends=True
49 49 ),
50 50 )
51 51
52 52 CRLF_MAGIC = (
53 53 ["a = f()\n", "%ls\r\n", "g()\n"],
54 54 (2, 0),
55 55 ["a = f()\n", "get_ipython().run_line_magic('ls', '')\n", "g()\n"],
56 56 )
57 57
58 58 MULTILINE_MAGIC_ASSIGN = (
59 59 """\
60 60 a = f()
61 61 b = %foo \\
62 62 bar
63 63 g()
64 64 """.splitlines(
65 65 keepends=True
66 66 ),
67 67 (2, 4),
68 68 """\
69 69 a = f()
70 70 b = get_ipython().run_line_magic('foo', ' bar')
71 71 g()
72 72 """.splitlines(
73 73 keepends=True
74 74 ),
75 75 )
76 76
77 77 MULTILINE_SYSTEM_ASSIGN = ("""\
78 78 a = f()
79 79 b = !foo \\
80 80 bar
81 81 g()
82 82 """.splitlines(keepends=True), (2, 4), """\
83 83 a = f()
84 84 b = get_ipython().getoutput('foo bar')
85 85 g()
86 86 """.splitlines(keepends=True))
87 87
88 88 #####
89 89
90 90 MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = (
91 91 """\
92 92 def test():
93 93 for i in range(1):
94 94 print(i)
95 95 res =! ls
96 96 """.splitlines(
97 97 keepends=True
98 98 ),
99 99 (4, 7),
100 100 """\
101 101 def test():
102 102 for i in range(1):
103 103 print(i)
104 104 res =get_ipython().getoutput(\' ls\')
105 105 """.splitlines(
106 106 keepends=True
107 107 ),
108 108 )
109 109
110 110 ######
111 111
112 112 AUTOCALL_QUOTE = ([",f 1 2 3\n"], (1, 0), ['f("1", "2", "3")\n'])
113 113
114 114 AUTOCALL_QUOTE2 = ([";f 1 2 3\n"], (1, 0), ['f("1 2 3")\n'])
115 115
116 116 AUTOCALL_PAREN = (["/f 1 2 3\n"], (1, 0), ["f(1, 2, 3)\n"])
117 117
118 118 SIMPLE_HELP = (["foo?\n"], (1, 0), ["get_ipython().run_line_magic('pinfo', 'foo')\n"])
119 119
120 120 DETAILED_HELP = (
121 121 ["foo??\n"],
122 122 (1, 0),
123 123 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"],
124 124 )
125 125
126 126 MAGIC_HELP = (["%foo?\n"], (1, 0), ["get_ipython().run_line_magic('pinfo', '%foo')\n"])
127 127
128 128 HELP_IN_EXPR = (
129 129 ["a = b + c?\n"],
130 130 (1, 0),
131 131 ["get_ipython().run_line_magic('pinfo', 'c')\n"],
132 132 )
133 133
134 134 HELP_CONTINUED_LINE = (
135 135 """\
136 136 a = \\
137 137 zip?
138 138 """.splitlines(
139 139 keepends=True
140 140 ),
141 141 (1, 0),
142 142 [r"get_ipython().run_line_magic('pinfo', 'zip')" + "\n"],
143 143 )
144 144
145 145 HELP_MULTILINE = (
146 146 """\
147 147 (a,
148 148 b) = zip?
149 149 """.splitlines(
150 150 keepends=True
151 151 ),
152 152 (1, 0),
153 153 [r"get_ipython().run_line_magic('pinfo', 'zip')" + "\n"],
154 154 )
155 155
156 156 HELP_UNICODE = (
157 157 ["Ο€.foo?\n"],
158 158 (1, 0),
159 159 ["get_ipython().run_line_magic('pinfo', 'Ο€.foo')\n"],
160 160 )
161 161
162 162
163 163 def null_cleanup_transformer(lines):
164 164 """
165 165 A cleanup transform that returns an empty list.
166 166 """
167 167 return []
168 168
169 169
170 170 def test_check_make_token_by_line_never_ends_empty():
171 171 """
172 172 Check that not sequence of single or double characters ends up leading to en empty list of tokens
173 173 """
174 174 from string import printable
175 175
176 176 for c in printable:
177 177 assert make_tokens_by_line(c)[-1] != []
178 178 for k in printable:
179 179 assert make_tokens_by_line(c + k)[-1] != []
180 180
181 181
182 182 def check_find(transformer, case, match=True):
183 183 sample, expected_start, _ = case
184 184 tbl = make_tokens_by_line(sample)
185 185 res = transformer.find(tbl)
186 186 if match:
187 187 # start_line is stored 0-indexed, expected values are 1-indexed
188 188 assert (res.start_line + 1, res.start_col) == expected_start
189 189 return res
190 190 else:
191 191 assert res is None
192 192
193 193
194 194 def check_transform(transformer_cls, case):
195 195 lines, start, expected = case
196 196 transformer = transformer_cls(start)
197 197 assert transformer.transform(lines) == expected
198 198
199 199
200 200 def test_continued_line():
201 201 lines = MULTILINE_MAGIC_ASSIGN[0]
202 202 assert ipt2.find_end_of_continued_line(lines, 1) == 2
203 203
204 204 assert ipt2.assemble_continued_line(lines, (1, 5), 2) == "foo bar"
205 205
206 206
207 207 def test_find_assign_magic():
208 208 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
209 209 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
210 210 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
211 211
212 212
213 213 def test_transform_assign_magic():
214 214 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
215 215
216 216
217 217 def test_find_assign_system():
218 218 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
219 219 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
220 220 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
221 221 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
222 222 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
223 223
224 224
225 225 def test_transform_assign_system():
226 226 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
227 227 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
228 228
229 229
230 230 def test_find_magic_escape():
231 231 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
232 232 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
233 233 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
234 234
235 235
236 236 def test_transform_magic_escape():
237 237 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
238 238 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
239 239 check_transform(ipt2.EscapedCommand, CRLF_MAGIC)
240 240
241 241
242 242 def test_find_autocalls():
243 243 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
244 244 print("Testing %r" % case[0])
245 245 check_find(ipt2.EscapedCommand, case)
246 246
247 247
248 248 def test_transform_autocall():
249 249 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
250 250 print("Testing %r" % case[0])
251 251 check_transform(ipt2.EscapedCommand, case)
252 252
253 253
254 254 def test_find_help():
255 255 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
256 256 check_find(ipt2.HelpEnd, case)
257 257
258 258 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
259 259 assert tf.q_line == 1
260 260 assert tf.q_col == 3
261 261
262 262 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
263 263 assert tf.q_line == 1
264 264 assert tf.q_col == 8
265 265
266 266 # ? in a comment does not trigger help
267 267 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
268 268 # Nor in a string
269 269 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
270 270
271 271
272 272 def test_transform_help():
273 273 tf = ipt2.HelpEnd((1, 0), (1, 9))
274 274 assert tf.transform(HELP_IN_EXPR[0]) == HELP_IN_EXPR[2]
275 275
276 276 tf = ipt2.HelpEnd((1, 0), (2, 3))
277 277 assert tf.transform(HELP_CONTINUED_LINE[0]) == HELP_CONTINUED_LINE[2]
278 278
279 279 tf = ipt2.HelpEnd((1, 0), (2, 8))
280 280 assert tf.transform(HELP_MULTILINE[0]) == HELP_MULTILINE[2]
281 281
282 282 tf = ipt2.HelpEnd((1, 0), (1, 0))
283 283 assert tf.transform(HELP_UNICODE[0]) == HELP_UNICODE[2]
284 284
285 285
286 286 def test_find_assign_op_dedent():
287 287 """
288 288 be careful that empty token like dedent are not counted as parens
289 289 """
290 290
291 291 class Tk:
292 292 def __init__(self, s):
293 293 self.string = s
294 294
295 295 assert _find_assign_op([Tk(s) for s in ("", "a", "=", "b")]) == 2
296 296 assert (
297 297 _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
298 298 )
299 299
300
300 301 extra_closing_paren_param = (
301 302 pytest.param("(\n))", "invalid", None)
302 303 if sys.version_info >= (3, 12)
303 304 else pytest.param("(\n))", "incomplete", 0)
304 305 )
305 306 examples = [
306 307 pytest.param("a = 1", "complete", None),
307 308 pytest.param("for a in range(5):", "incomplete", 4),
308 309 pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
309 310 pytest.param("raise = 2", "invalid", None),
310 311 pytest.param("a = [1,\n2,", "incomplete", 0),
311 312 extra_closing_paren_param,
312 313 pytest.param("\\\r\n", "incomplete", 0),
313 314 pytest.param("a = '''\n hi", "incomplete", 3),
314 315 pytest.param("def a():\n x=1\n global x", "invalid", None),
315 316 pytest.param(
316 317 "a \\ ",
317 318 "invalid",
318 319 None,
319 320 marks=pytest.mark.xfail(
320 321 reason="Bug in python 3.9.8 – bpo 45738",
321 322 condition=sys.version_info
322 323 in [(3, 9, 8, "final", 0), (3, 11, 0, "alpha", 2)],
323 324 raises=SystemError,
324 325 strict=True,
325 326 ),
326 327 ), # Nothing allowed after backslash,
327 328 pytest.param("1\\\n+2", "complete", None),
328 329 ]
329 330
330 331
331 332 @pytest.mark.parametrize("code, expected, number", examples)
332 333 def test_check_complete_param(code, expected, number):
333 334 cc = ipt2.TransformerManager().check_complete
334 335 assert cc(code) == (expected, number)
335 336
336 337
337 338 @pytest.mark.xfail(platform.python_implementation() == "PyPy", reason="fail on pypy")
338 339 @pytest.mark.xfail(
339 340 reason="Bug in python 3.9.8 – bpo 45738",
340 341 condition=sys.version_info in [(3, 9, 8, "final", 0), (3, 11, 0, "alpha", 2)],
341 342 raises=SystemError,
342 343 strict=True,
343 344 )
344 345 def test_check_complete():
345 346 cc = ipt2.TransformerManager().check_complete
346 347
347 348 example = dedent(
348 349 """
349 350 if True:
350 351 a=1"""
351 352 )
352 353
353 354 assert cc(example) == ("incomplete", 4)
354 355 assert cc(example + "\n") == ("complete", None)
355 356 assert cc(example + "\n ") == ("complete", None)
356 357
357 358 # no need to loop on all the letters/numbers.
358 359 short = "12abAB" + string.printable[62:]
359 360 for c in short:
360 361 # test does not raise:
361 362 cc(c)
362 363 for k in short:
363 364 cc(c + k)
364 365
365 366 assert cc("def f():\n x=0\n \\\n ") == ("incomplete", 2)
366 367
367 368
368 369 @pytest.mark.xfail(platform.python_implementation() == "PyPy", reason="fail on pypy")
369 370 @pytest.mark.parametrize(
370 371 "value, expected",
371 372 [
372 373 ('''def foo():\n """''', ("incomplete", 4)),
373 374 ("""async with example:\n pass""", ("incomplete", 4)),
374 375 ("""async with example:\n pass\n """, ("complete", None)),
375 376 ],
376 377 )
377 378 def test_check_complete_II(value, expected):
378 379 """
379 380 Test that multiple line strings are properly handled.
380 381
381 382 Separate test function for convenience
382 383
383 384 """
384 385 cc = ipt2.TransformerManager().check_complete
385 386 assert cc(value) == expected
386 387
387 388
388 389 @pytest.mark.parametrize(
389 390 "value, expected",
390 391 [
391 392 (")", ("invalid", None)),
392 393 ("]", ("invalid", None)),
393 394 ("}", ("invalid", None)),
394 395 (")(", ("invalid", None)),
395 396 ("][", ("invalid", None)),
396 397 ("}{", ("invalid", None)),
397 398 ("]()(", ("invalid", None)),
398 399 ("())(", ("invalid", None)),
399 400 (")[](", ("invalid", None)),
400 401 ("()](", ("invalid", None)),
401 402 ],
402 403 )
403 404 def test_check_complete_invalidates_sunken_brackets(value, expected):
404 405 """
405 406 Test that a single line with more closing brackets than the opening ones is
406 407 interpreted as invalid
407 408 """
408 409 cc = ipt2.TransformerManager().check_complete
409 410 assert cc(value) == expected
410 411
411 412
412 413 def test_null_cleanup_transformer():
413 414 manager = ipt2.TransformerManager()
414 415 manager.cleanup_transforms.insert(0, null_cleanup_transformer)
415 416 assert manager.transform_cell("") == ""
416 417
417 418
418 419 def test_side_effects_I():
419 420 count = 0
420 421
421 422 def counter(lines):
422 423 nonlocal count
423 424 count += 1
424 425 return lines
425 426
426 427 counter.has_side_effects = True
427 428
428 429 manager = ipt2.TransformerManager()
429 430 manager.cleanup_transforms.insert(0, counter)
430 431 assert manager.check_complete("a=1\n") == ("complete", None)
431 432 assert count == 0
432 433
433 434
434 435 def test_side_effects_II():
435 436 count = 0
436 437
437 438 def counter(lines):
438 439 nonlocal count
439 440 count += 1
440 441 return lines
441 442
442 443 counter.has_side_effects = True
443 444
444 445 manager = ipt2.TransformerManager()
445 446 manager.line_transforms.insert(0, counter)
446 447 assert manager.check_complete("b=1\n") == ("complete", None)
447 448 assert count == 0
@@ -1,150 +1,155
1 1 """Token-related utilities"""
2 2
3 3 # Copyright (c) IPython Development Team.
4 4 # Distributed under the terms of the Modified BSD License.
5 5
6 6 from collections import namedtuple
7 7 from io import StringIO
8 8 from keyword import iskeyword
9 9
10 10 import tokenize
11 11
12 12
13 13 Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])
14 14
15 15 def generate_tokens(readline):
16 16 """wrap generate_tokens to catch EOF errors"""
17 17 try:
18 18 for token in tokenize.generate_tokens(readline):
19 19 yield token
20 20 except tokenize.TokenError:
21 21 # catch EOF error
22 22 return
23 23
24
24 25 def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
25 default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
26 'after line continuation character']
26 default_errors_to_catch = [
27 "unterminated string literal",
28 "invalid non-printable character",
29 "after line continuation character",
30 ]
27 31 assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
28 32 errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
29 33
30 34 tokens = []
31 35 try:
32 36 for token in tokenize.generate_tokens(readline):
33 37 tokens.append(token)
34 38 yield token
35 39 except tokenize.TokenError as exc:
36 40 if any(error in exc.args[0] for error in errors_to_catch):
37 41 if tokens:
38 42 start = tokens[-1].start[0], tokens[-1].end[0]
39 43 end = start
40 44 line = tokens[-1].line
41 45 else:
42 46 start = end = (1, 0)
43 line = ''
44 yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
47 line = ""
48 yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
45 49 else:
46 50 # Catch EOF
47 51 raise
48 52
53
49 54 def line_at_cursor(cell, cursor_pos=0):
50 55 """Return the line in a cell at a given cursor position
51 56
52 57 Used for calling line-based APIs that don't support multi-line input, yet.
53 58
54 59 Parameters
55 60 ----------
56 61 cell : str
57 62 multiline block of text
58 63 cursor_pos : integer
59 64 the cursor position
60 65
61 66 Returns
62 67 -------
63 68 (line, offset): (string, integer)
64 69 The line with the current cursor, and the character offset of the start of the line.
65 70 """
66 71 offset = 0
67 72 lines = cell.splitlines(True)
68 73 for line in lines:
69 74 next_offset = offset + len(line)
70 75 if not line.endswith('\n'):
71 76 # If the last line doesn't have a trailing newline, treat it as if
72 77 # it does so that the cursor at the end of the line still counts
73 78 # as being on that line.
74 79 next_offset += 1
75 80 if next_offset > cursor_pos:
76 81 break
77 82 offset = next_offset
78 83 else:
79 84 line = ""
80 85 return (line, offset)
81 86
82 87 def token_at_cursor(cell, cursor_pos=0):
83 88 """Get the token at a given cursor
84 89
85 90 Used for introspection.
86 91
87 92 Function calls are prioritized, so the token for the callable will be returned
88 93 if the cursor is anywhere inside the call.
89 94
90 95 Parameters
91 96 ----------
92 97 cell : unicode
93 98 A block of Python code
94 99 cursor_pos : int
95 100 The location of the cursor in the block where the token should be found
96 101 """
97 102 names = []
98 103 tokens = []
99 104 call_names = []
100 105
101 106 offsets = {1: 0} # lines start at 1
102 107 for tup in generate_tokens(StringIO(cell).readline):
103 108
104 109 tok = Token(*tup)
105 110
106 111 # token, text, start, end, line = tup
107 112 start_line, start_col = tok.start
108 113 end_line, end_col = tok.end
109 114 if end_line + 1 not in offsets:
110 115 # keep track of offsets for each line
111 116 lines = tok.line.splitlines(True)
112 117 for lineno, line in enumerate(lines, start_line + 1):
113 118 if lineno not in offsets:
114 119 offsets[lineno] = offsets[lineno-1] + len(line)
115 120
116 121 offset = offsets[start_line]
117 122 # allow '|foo' to find 'foo' at the beginning of a line
118 123 boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
119 124 if offset + start_col >= boundary:
120 125 # current token starts after the cursor,
121 126 # don't consume it
122 127 break
123 128
124 129 if tok.token == tokenize.NAME and not iskeyword(tok.text):
125 130 if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':
126 131 names[-1] = "%s.%s" % (names[-1], tok.text)
127 132 else:
128 133 names.append(tok.text)
129 134 elif tok.token == tokenize.OP:
130 135 if tok.text == '=' and names:
131 136 # don't inspect the lhs of an assignment
132 137 names.pop(-1)
133 138 if tok.text == '(' and names:
134 139 # if we are inside a function call, inspect the function
135 140 call_names.append(names[-1])
136 141 elif tok.text == ')' and call_names:
137 142 call_names.pop(-1)
138 143
139 144 tokens.append(tok)
140 145
141 146 if offsets[end_line] + end_col > cursor_pos:
142 147 # we found the cursor, stop reading
143 148 break
144 149
145 150 if call_names:
146 151 return call_names[-1]
147 152 elif names:
148 153 return names[-1]
149 154 else:
150 155 return ''
General Comments 0
You need to be logged in to leave comments. Login now