##// END OF EJS Templates
Merge pull request #13436 from Carreau/fix-async-with...
Matthias Bussonnier -
r27443:6edc05a6 merge
parent child Browse files
Show More
@@ -1,796 +1,796 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 import ast
14 14 import sys
15 15 from codeop import CommandCompiler, Compile
16 16 import re
17 17 import tokenize
18 18 from typing import List, Tuple, Optional, Any
19 19 import warnings
20 20
21 21 _indent_re = re.compile(r'^[ \t]+')
22 22
23 23 def leading_empty_lines(lines):
24 24 """Remove leading empty lines
25 25
26 26 If the leading lines are empty or contain only whitespace, they will be
27 27 removed.
28 28 """
29 29 if not lines:
30 30 return lines
31 31 for i, line in enumerate(lines):
32 32 if line and not line.isspace():
33 33 return lines[i:]
34 34 return lines
35 35
36 36 def leading_indent(lines):
37 37 """Remove leading indentation.
38 38
39 39 If the first line starts with a spaces or tabs, the same whitespace will be
40 40 removed from each following line in the cell.
41 41 """
42 42 if not lines:
43 43 return lines
44 44 m = _indent_re.match(lines[0])
45 45 if not m:
46 46 return lines
47 47 space = m.group(0)
48 48 n = len(space)
49 49 return [l[n:] if l.startswith(space) else l
50 50 for l in lines]
51 51
52 52 class PromptStripper:
53 53 """Remove matching input prompts from a block of input.
54 54
55 55 Parameters
56 56 ----------
57 57 prompt_re : regular expression
58 58 A regular expression matching any input prompt (including continuation,
59 59 e.g. ``...``)
60 60 initial_re : regular expression, optional
61 61 A regular expression matching only the initial prompt, but not continuation.
62 62 If no initial expression is given, prompt_re will be used everywhere.
63 63 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
64 64 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
65 65
66 66 Notes
67 67 -----
68 68
69 69 If initial_re and prompt_re differ,
70 70 only initial_re will be tested against the first line.
71 71 If any prompt is found on the first two lines,
72 72 prompts will be stripped from the rest of the block.
73 73 """
74 74 def __init__(self, prompt_re, initial_re=None):
75 75 self.prompt_re = prompt_re
76 76 self.initial_re = initial_re or prompt_re
77 77
78 78 def _strip(self, lines):
79 79 return [self.prompt_re.sub('', l, count=1) for l in lines]
80 80
81 81 def __call__(self, lines):
82 82 if not lines:
83 83 return lines
84 84 if self.initial_re.match(lines[0]) or \
85 85 (len(lines) > 1 and self.prompt_re.match(lines[1])):
86 86 return self._strip(lines)
87 87 return lines
88 88
89 89 classic_prompt = PromptStripper(
90 90 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
91 91 initial_re=re.compile(r'^>>>( |$)')
92 92 )
93 93
94 94 ipython_prompt = PromptStripper(
95 95 re.compile(
96 96 r"""
97 97 ^( # Match from the beginning of a line, either:
98 98
99 99 # 1. First-line prompt:
100 100 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
101 101 In\ # The 'In' of the prompt, with a space
102 102 \[\d+\]: # Command index, as displayed in the prompt
103 103 \ # With a mandatory trailing space
104 104
105 105 | # ... or ...
106 106
107 107 # 2. The three dots of the multiline prompt
108 108 \s* # All leading whitespace characters
109 109 \.{3,}: # The three (or more) dots
110 110 \ ? # With an optional trailing space
111 111
112 112 )
113 113 """,
114 114 re.VERBOSE,
115 115 )
116 116 )
117 117
118 118
119 119 def cell_magic(lines):
120 120 if not lines or not lines[0].startswith('%%'):
121 121 return lines
122 122 if re.match(r'%%\w+\?', lines[0]):
123 123 # This case will be handled by help_end
124 124 return lines
125 125 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
126 126 body = ''.join(lines[1:])
127 127 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
128 128 % (magic_name, first_line, body)]
129 129
130 130
131 131 def _find_assign_op(token_line) -> Optional[int]:
132 132 """Get the index of the first assignment in the line ('=' not inside brackets)
133 133
134 134 Note: We don't try to support multiple special assignment (a = b = %foo)
135 135 """
136 136 paren_level = 0
137 137 for i, ti in enumerate(token_line):
138 138 s = ti.string
139 139 if s == '=' and paren_level == 0:
140 140 return i
141 141 if s in {'(','[','{'}:
142 142 paren_level += 1
143 143 elif s in {')', ']', '}'}:
144 144 if paren_level > 0:
145 145 paren_level -= 1
146 146 return None
147 147
148 148 def find_end_of_continued_line(lines, start_line: int):
149 149 """Find the last line of a line explicitly extended using backslashes.
150 150
151 151 Uses 0-indexed line numbers.
152 152 """
153 153 end_line = start_line
154 154 while lines[end_line].endswith('\\\n'):
155 155 end_line += 1
156 156 if end_line >= len(lines):
157 157 break
158 158 return end_line
159 159
160 160 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
161 161 r"""Assemble a single line from multiple continued line pieces
162 162
163 163 Continued lines are lines ending in ``\``, and the line following the last
164 164 ``\`` in the block.
165 165
166 166 For example, this code continues over multiple lines::
167 167
168 168 if (assign_ix is not None) \
169 169 and (len(line) >= assign_ix + 2) \
170 170 and (line[assign_ix+1].string == '%') \
171 171 and (line[assign_ix+2].type == tokenize.NAME):
172 172
173 173 This statement contains four continued line pieces.
174 174 Assembling these pieces into a single line would give::
175 175
176 176 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
177 177
178 178 This uses 0-indexed line numbers. *start* is (lineno, colno).
179 179
180 180 Used to allow ``%magic`` and ``!system`` commands to be continued over
181 181 multiple lines.
182 182 """
183 183 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
184 184 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
185 185 + [parts[-1].rstrip()]) # Strip newline from last line
186 186
187 187 class TokenTransformBase:
188 188 """Base class for transformations which examine tokens.
189 189
190 190 Special syntax should not be transformed when it occurs inside strings or
191 191 comments. This is hard to reliably avoid with regexes. The solution is to
192 192 tokenise the code as Python, and recognise the special syntax in the tokens.
193 193
194 194 IPython's special syntax is not valid Python syntax, so tokenising may go
195 195 wrong after the special syntax starts. These classes therefore find and
196 196 transform *one* instance of special syntax at a time into regular Python
197 197 syntax. After each transformation, tokens are regenerated to find the next
198 198 piece of special syntax.
199 199
200 200 Subclasses need to implement one class method (find)
201 201 and one regular method (transform).
202 202
203 203 The priority attribute can select which transformation to apply if multiple
204 204 transformers match in the same place. Lower numbers have higher priority.
205 205 This allows "%magic?" to be turned into a help call rather than a magic call.
206 206 """
207 207 # Lower numbers -> higher priority (for matches in the same location)
208 208 priority = 10
209 209
210 210 def sortby(self):
211 211 return self.start_line, self.start_col, self.priority
212 212
213 213 def __init__(self, start):
214 214 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
215 215 self.start_col = start[1]
216 216
217 217 @classmethod
218 218 def find(cls, tokens_by_line):
219 219 """Find one instance of special syntax in the provided tokens.
220 220
221 221 Tokens are grouped into logical lines for convenience,
222 222 so it is easy to e.g. look at the first token of each line.
223 223 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
224 224
225 225 This should return an instance of its class, pointing to the start
226 226 position it has found, or None if it found no match.
227 227 """
228 228 raise NotImplementedError
229 229
230 230 def transform(self, lines: List[str]):
231 231 """Transform one instance of special syntax found by ``find()``
232 232
233 233 Takes a list of strings representing physical lines,
234 234 returns a similar list of transformed lines.
235 235 """
236 236 raise NotImplementedError
237 237
238 238 class MagicAssign(TokenTransformBase):
239 239 """Transformer for assignments from magics (a = %foo)"""
240 240 @classmethod
241 241 def find(cls, tokens_by_line):
242 242 """Find the first magic assignment (a = %foo) in the cell.
243 243 """
244 244 for line in tokens_by_line:
245 245 assign_ix = _find_assign_op(line)
246 246 if (assign_ix is not None) \
247 247 and (len(line) >= assign_ix + 2) \
248 248 and (line[assign_ix+1].string == '%') \
249 249 and (line[assign_ix+2].type == tokenize.NAME):
250 250 return cls(line[assign_ix+1].start)
251 251
252 252 def transform(self, lines: List[str]):
253 253 """Transform a magic assignment found by the ``find()`` classmethod.
254 254 """
255 255 start_line, start_col = self.start_line, self.start_col
256 256 lhs = lines[start_line][:start_col]
257 257 end_line = find_end_of_continued_line(lines, start_line)
258 258 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
259 259 assert rhs.startswith('%'), rhs
260 260 magic_name, _, args = rhs[1:].partition(' ')
261 261
262 262 lines_before = lines[:start_line]
263 263 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
264 264 new_line = lhs + call + '\n'
265 265 lines_after = lines[end_line+1:]
266 266
267 267 return lines_before + [new_line] + lines_after
268 268
269 269
270 270 class SystemAssign(TokenTransformBase):
271 271 """Transformer for assignments from system commands (a = !foo)"""
272 272 @classmethod
273 273 def find(cls, tokens_by_line):
274 274 """Find the first system assignment (a = !foo) in the cell.
275 275 """
276 276 for line in tokens_by_line:
277 277 assign_ix = _find_assign_op(line)
278 278 if (assign_ix is not None) \
279 279 and not line[assign_ix].line.strip().startswith('=') \
280 280 and (len(line) >= assign_ix + 2) \
281 281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
282 282 ix = assign_ix + 1
283 283
284 284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
285 285 if line[ix].string == '!':
286 286 return cls(line[ix].start)
287 287 elif not line[ix].string.isspace():
288 288 break
289 289 ix += 1
290 290
291 291 def transform(self, lines: List[str]):
292 292 """Transform a system assignment found by the ``find()`` classmethod.
293 293 """
294 294 start_line, start_col = self.start_line, self.start_col
295 295
296 296 lhs = lines[start_line][:start_col]
297 297 end_line = find_end_of_continued_line(lines, start_line)
298 298 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
299 299 assert rhs.startswith('!'), rhs
300 300 cmd = rhs[1:]
301 301
302 302 lines_before = lines[:start_line]
303 303 call = "get_ipython().getoutput({!r})".format(cmd)
304 304 new_line = lhs + call + '\n'
305 305 lines_after = lines[end_line + 1:]
306 306
307 307 return lines_before + [new_line] + lines_after
308 308
309 309 # The escape sequences that define the syntax transformations IPython will
310 310 # apply to user input. These can NOT be just changed here: many regular
311 311 # expressions and other parts of the code may use their hardcoded values, and
312 312 # for all intents and purposes they constitute the 'IPython syntax', so they
313 313 # should be considered fixed.
314 314
315 315 ESC_SHELL = '!' # Send line to underlying system shell
316 316 ESC_SH_CAP = '!!' # Send line to system shell and capture output
317 317 ESC_HELP = '?' # Find information about object
318 318 ESC_HELP2 = '??' # Find extra-detailed information about object
319 319 ESC_MAGIC = '%' # Call magic function
320 320 ESC_MAGIC2 = '%%' # Call cell-magic function
321 321 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
322 322 ESC_QUOTE2 = ';' # Quote all args as a single string, call
323 323 ESC_PAREN = '/' # Call first argument with rest of line as arguments
324 324
325 325 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
326 326 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
327 327
328 328 def _make_help_call(target, esc, next_input=None):
329 329 """Prepares a pinfo(2)/psearch call from a target name and the escape
330 330 (i.e. ? or ??)"""
331 331 method = 'pinfo2' if esc == '??' \
332 332 else 'psearch' if '*' in target \
333 333 else 'pinfo'
334 334 arg = " ".join([method, target])
335 335 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
336 336 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
337 337 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
338 338 if next_input is None:
339 339 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
340 340 else:
341 341 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
342 342 (next_input, t_magic_name, t_magic_arg_s)
343 343
344 344 def _tr_help(content):
345 345 """Translate lines escaped with: ?
346 346
347 347 A naked help line should fire the intro help screen (shell.show_usage())
348 348 """
349 349 if not content:
350 350 return 'get_ipython().show_usage()'
351 351
352 352 return _make_help_call(content, '?')
353 353
354 354 def _tr_help2(content):
355 355 """Translate lines escaped with: ??
356 356
357 357 A naked help line should fire the intro help screen (shell.show_usage())
358 358 """
359 359 if not content:
360 360 return 'get_ipython().show_usage()'
361 361
362 362 return _make_help_call(content, '??')
363 363
364 364 def _tr_magic(content):
365 365 "Translate lines escaped with a percent sign: %"
366 366 name, _, args = content.partition(' ')
367 367 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
368 368
369 369 def _tr_quote(content):
370 370 "Translate lines escaped with a comma: ,"
371 371 name, _, args = content.partition(' ')
372 372 return '%s("%s")' % (name, '", "'.join(args.split()) )
373 373
374 374 def _tr_quote2(content):
375 375 "Translate lines escaped with a semicolon: ;"
376 376 name, _, args = content.partition(' ')
377 377 return '%s("%s")' % (name, args)
378 378
379 379 def _tr_paren(content):
380 380 "Translate lines escaped with a slash: /"
381 381 name, _, args = content.partition(' ')
382 382 return '%s(%s)' % (name, ", ".join(args.split()))
383 383
384 384 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
385 385 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
386 386 ESC_HELP : _tr_help,
387 387 ESC_HELP2 : _tr_help2,
388 388 ESC_MAGIC : _tr_magic,
389 389 ESC_QUOTE : _tr_quote,
390 390 ESC_QUOTE2 : _tr_quote2,
391 391 ESC_PAREN : _tr_paren }
392 392
393 393 class EscapedCommand(TokenTransformBase):
394 394 """Transformer for escaped commands like %foo, !foo, or /foo"""
395 395 @classmethod
396 396 def find(cls, tokens_by_line):
397 397 """Find the first escaped command (%foo, !foo, etc.) in the cell.
398 398 """
399 399 for line in tokens_by_line:
400 400 if not line:
401 401 continue
402 402 ix = 0
403 403 ll = len(line)
404 404 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
405 405 ix += 1
406 406 if ix >= ll:
407 407 continue
408 408 if line[ix].string in ESCAPE_SINGLES:
409 409 return cls(line[ix].start)
410 410
411 411 def transform(self, lines):
412 412 """Transform an escaped line found by the ``find()`` classmethod.
413 413 """
414 414 start_line, start_col = self.start_line, self.start_col
415 415
416 416 indent = lines[start_line][:start_col]
417 417 end_line = find_end_of_continued_line(lines, start_line)
418 418 line = assemble_continued_line(lines, (start_line, start_col), end_line)
419 419
420 420 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
421 421 escape, content = line[:2], line[2:]
422 422 else:
423 423 escape, content = line[:1], line[1:]
424 424
425 425 if escape in tr:
426 426 call = tr[escape](content)
427 427 else:
428 428 call = ''
429 429
430 430 lines_before = lines[:start_line]
431 431 new_line = indent + call + '\n'
432 432 lines_after = lines[end_line + 1:]
433 433
434 434 return lines_before + [new_line] + lines_after
435 435
436 436 _help_end_re = re.compile(r"""(%{0,2}
437 437 (?!\d)[\w*]+ # Variable name
438 438 (\.(?!\d)[\w*]+)* # .etc.etc
439 439 )
440 440 (\?\??)$ # ? or ??
441 441 """,
442 442 re.VERBOSE)
443 443
444 444 class HelpEnd(TokenTransformBase):
445 445 """Transformer for help syntax: obj? and obj??"""
446 446 # This needs to be higher priority (lower number) than EscapedCommand so
447 447 # that inspecting magics (%foo?) works.
448 448 priority = 5
449 449
450 450 def __init__(self, start, q_locn):
451 451 super().__init__(start)
452 452 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
453 453 self.q_col = q_locn[1]
454 454
455 455 @classmethod
456 456 def find(cls, tokens_by_line):
457 457 """Find the first help command (foo?) in the cell.
458 458 """
459 459 for line in tokens_by_line:
460 460 # Last token is NEWLINE; look at last but one
461 461 if len(line) > 2 and line[-2].string == '?':
462 462 # Find the first token that's not INDENT/DEDENT
463 463 ix = 0
464 464 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
465 465 ix += 1
466 466 return cls(line[ix].start, line[-2].start)
467 467
468 468 def transform(self, lines):
469 469 """Transform a help command found by the ``find()`` classmethod.
470 470 """
471 471 piece = ''.join(lines[self.start_line:self.q_line+1])
472 472 indent, content = piece[:self.start_col], piece[self.start_col:]
473 473 lines_before = lines[:self.start_line]
474 474 lines_after = lines[self.q_line + 1:]
475 475
476 476 m = _help_end_re.search(content)
477 477 if not m:
478 478 raise SyntaxError(content)
479 479 assert m is not None, content
480 480 target = m.group(1)
481 481 esc = m.group(3)
482 482
483 483 # If we're mid-command, put it back on the next prompt for the user.
484 484 next_input = None
485 485 if (not lines_before) and (not lines_after) \
486 486 and content.strip() != m.group(0):
487 487 next_input = content.rstrip('?\n')
488 488
489 489 call = _make_help_call(target, esc, next_input=next_input)
490 490 new_line = indent + call + '\n'
491 491
492 492 return lines_before + [new_line] + lines_after
493 493
494 494 def make_tokens_by_line(lines:List[str]):
495 495 """Tokenize a series of lines and group tokens by line.
496 496
497 497 The tokens for a multiline Python string or expression are grouped as one
498 498 line. All lines except the last lines should keep their line ending ('\\n',
499 499 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
500 500 for example when passing block of text to this function.
501 501
502 502 """
503 503 # NL tokens are used inside multiline expressions, but also after blank
504 504 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
505 505 # We want to group the former case together but split the latter, so we
506 506 # track parentheses level, similar to the internals of tokenize.
507 507
508 508 # reexported from token on 3.7+
509 509 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
510 510 tokens_by_line:List[List[Any]] = [[]]
511 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
512 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
511 if len(lines) > 1 and not lines[0].endswith(("\n", "\r", "\r\n", "\x0b", "\x0c")):
512 warnings.warn(
513 "`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified",
514 stacklevel=2,
515 )
513 516 parenlev = 0
514 517 try:
515 518 for token in tokenize.generate_tokens(iter(lines).__next__):
516 519 tokens_by_line[-1].append(token)
517 520 if (token.type == NEWLINE) \
518 521 or ((token.type == NL) and (parenlev <= 0)):
519 522 tokens_by_line.append([])
520 523 elif token.string in {'(', '[', '{'}:
521 524 parenlev += 1
522 525 elif token.string in {')', ']', '}'}:
523 526 if parenlev > 0:
524 527 parenlev -= 1
525 528 except tokenize.TokenError:
526 529 # Input ended in a multiline string or expression. That's OK for us.
527 530 pass
528 531
529 532
530 533 if not tokens_by_line[-1]:
531 534 tokens_by_line.pop()
532 535
533 536
534 537 return tokens_by_line
535 538
536 539
537 540 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
538 541 """Check if the depth of brackets in the list of tokens drops below 0"""
539 542 parenlev = 0
540 543 for token in tokens:
541 544 if token.string in {"(", "[", "{"}:
542 545 parenlev += 1
543 546 elif token.string in {")", "]", "}"}:
544 547 parenlev -= 1
545 548 if parenlev < 0:
546 549 return True
547 550 return False
548 551
549 552
550 553 def show_linewise_tokens(s: str):
551 554 """For investigation and debugging"""
552 555 if not s.endswith('\n'):
553 556 s += '\n'
554 557 lines = s.splitlines(keepends=True)
555 558 for line in make_tokens_by_line(lines):
556 559 print("Line -------")
557 560 for tokinfo in line:
558 561 print(" ", tokinfo)
559 562
560 563 # Arbitrary limit to prevent getting stuck in infinite loops
561 564 TRANSFORM_LOOP_LIMIT = 500
562 565
563 566 class TransformerManager:
564 567 """Applies various transformations to a cell or code block.
565 568
566 569 The key methods for external use are ``transform_cell()``
567 570 and ``check_complete()``.
568 571 """
569 572 def __init__(self):
570 573 self.cleanup_transforms = [
571 574 leading_empty_lines,
572 575 leading_indent,
573 576 classic_prompt,
574 577 ipython_prompt,
575 578 ]
576 579 self.line_transforms = [
577 580 cell_magic,
578 581 ]
579 582 self.token_transformers = [
580 583 MagicAssign,
581 584 SystemAssign,
582 585 EscapedCommand,
583 586 HelpEnd,
584 587 ]
585 588
586 589 def do_one_token_transform(self, lines):
587 590 """Find and run the transform earliest in the code.
588 591
589 592 Returns (changed, lines).
590 593
591 594 This method is called repeatedly until changed is False, indicating
592 595 that all available transformations are complete.
593 596
594 597 The tokens following IPython special syntax might not be valid, so
595 598 the transformed code is retokenised every time to identify the next
596 599 piece of special syntax. Hopefully long code cells are mostly valid
597 600 Python, not using lots of IPython special syntax, so this shouldn't be
598 601 a performance issue.
599 602 """
600 603 tokens_by_line = make_tokens_by_line(lines)
601 604 candidates = []
602 605 for transformer_cls in self.token_transformers:
603 606 transformer = transformer_cls.find(tokens_by_line)
604 607 if transformer:
605 608 candidates.append(transformer)
606 609
607 610 if not candidates:
608 611 # Nothing to transform
609 612 return False, lines
610 613 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
611 614 for transformer in ordered_transformers:
612 615 try:
613 616 return True, transformer.transform(lines)
614 617 except SyntaxError:
615 618 pass
616 619 return False, lines
617 620
618 621 def do_token_transforms(self, lines):
619 622 for _ in range(TRANSFORM_LOOP_LIMIT):
620 623 changed, lines = self.do_one_token_transform(lines)
621 624 if not changed:
622 625 return lines
623 626
624 627 raise RuntimeError("Input transformation still changing after "
625 628 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
626 629
627 630 def transform_cell(self, cell: str) -> str:
628 631 """Transforms a cell of input code"""
629 632 if not cell.endswith('\n'):
630 633 cell += '\n' # Ensure the cell has a trailing newline
631 634 lines = cell.splitlines(keepends=True)
632 635 for transform in self.cleanup_transforms + self.line_transforms:
633 636 lines = transform(lines)
634 637
635 638 lines = self.do_token_transforms(lines)
636 639 return ''.join(lines)
637 640
638 641 def check_complete(self, cell: str):
639 642 """Return whether a block of code is ready to execute, or should be continued
640 643
641 644 Parameters
642 645 ----------
643 646 cell : string
644 647 Python input code, which can be multiline.
645 648
646 649 Returns
647 650 -------
648 651 status : str
649 652 One of 'complete', 'incomplete', or 'invalid' if source is not a
650 653 prefix of valid code.
651 654 indent_spaces : int or None
652 655 The number of spaces by which to indent the next line of code. If
653 656 status is not 'incomplete', this is None.
654 657 """
655 658 # Remember if the lines ends in a new line.
656 659 ends_with_newline = False
657 660 for character in reversed(cell):
658 661 if character == '\n':
659 662 ends_with_newline = True
660 663 break
661 664 elif character.strip():
662 665 break
663 666 else:
664 667 continue
665 668
666 669 if not ends_with_newline:
667 670 # Append an newline for consistent tokenization
668 671 # See https://bugs.python.org/issue33899
669 672 cell += '\n'
670 673
671 674 lines = cell.splitlines(keepends=True)
672 675
673 676 if not lines:
674 677 return 'complete', None
675 678
676 679 if lines[-1].endswith('\\'):
677 680 # Explicit backslash continuation
678 681 return 'incomplete', find_last_indent(lines)
679 682
680 683 try:
681 684 for transform in self.cleanup_transforms:
682 685 if not getattr(transform, 'has_side_effects', False):
683 686 lines = transform(lines)
684 687 except SyntaxError:
685 688 return 'invalid', None
686 689
687 690 if lines[0].startswith('%%'):
688 691 # Special case for cell magics - completion marked by blank line
689 692 if lines[-1].strip():
690 693 return 'incomplete', find_last_indent(lines)
691 694 else:
692 695 return 'complete', None
693 696
694 697 try:
695 698 for transform in self.line_transforms:
696 699 if not getattr(transform, 'has_side_effects', False):
697 700 lines = transform(lines)
698 701 lines = self.do_token_transforms(lines)
699 702 except SyntaxError:
700 703 return 'invalid', None
701 704
702 705 tokens_by_line = make_tokens_by_line(lines)
703 706
704 707 # Bail if we got one line and there are more closing parentheses than
705 708 # the opening ones
706 709 if (
707 710 len(lines) == 1
708 711 and tokens_by_line
709 712 and has_sunken_brackets(tokens_by_line[0])
710 713 ):
711 714 return "invalid", None
712 715
713 716 if not tokens_by_line:
714 717 return 'incomplete', find_last_indent(lines)
715 718
716 719 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
717 720 # We're in a multiline string or expression
718 721 return 'incomplete', find_last_indent(lines)
719 722
720 723 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
721 724
722 725 # Pop the last line which only contains DEDENTs and ENDMARKER
723 726 last_token_line = None
724 727 if {t.type for t in tokens_by_line[-1]} in [
725 728 {tokenize.DEDENT, tokenize.ENDMARKER},
726 729 {tokenize.ENDMARKER}
727 730 ] and len(tokens_by_line) > 1:
728 731 last_token_line = tokens_by_line.pop()
729 732
730 733 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
731 734 tokens_by_line[-1].pop()
732 735
733 736 if not tokens_by_line[-1]:
734 737 return 'incomplete', find_last_indent(lines)
735 738
736 739 if tokens_by_line[-1][-1].string == ':':
737 740 # The last line starts a block (e.g. 'if foo:')
738 741 ix = 0
739 742 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
740 743 ix += 1
741 744
742 745 indent = tokens_by_line[-1][ix].start[1]
743 746 return 'incomplete', indent + 4
744 747
745 748 if tokens_by_line[-1][0].line.endswith('\\'):
746 749 return 'incomplete', None
747 750
748 751 # At this point, our checks think the code is complete (or invalid).
749 752 # We'll use codeop.compile_command to check this with the real parser
750 753 try:
751 754 with warnings.catch_warnings():
752 755 warnings.simplefilter('error', SyntaxWarning)
753 756 res = compile_command(''.join(lines), symbol='exec')
754 757 except (SyntaxError, OverflowError, ValueError, TypeError,
755 758 MemoryError, SyntaxWarning):
756 759 return 'invalid', None
757 760 else:
758 761 if res is None:
759 762 return 'incomplete', find_last_indent(lines)
760 763
761 764 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
762 765 if ends_with_newline:
763 766 return 'complete', None
764 767 return 'incomplete', find_last_indent(lines)
765 768
766 769 # If there's a blank line at the end, assume we're ready to execute
767 770 if not lines[-1].strip():
768 771 return 'complete', None
769 772
770 773 return 'complete', None
771 774
772 775
773 776 def find_last_indent(lines):
774 777 m = _indent_re.match(lines[-1])
775 778 if not m:
776 779 return 0
777 780 return len(m.group(0).replace('\t', ' '*4))
778 781
779 782
780 783 class MaybeAsyncCompile(Compile):
781 784 def __init__(self, extra_flags=0):
782 785 super().__init__()
783 786 self.flags |= extra_flags
784 787
785 def __call__(self, *args, **kwds):
786 return compile(*args, **kwds)
787
788 788
789 789 class MaybeAsyncCommandCompiler(CommandCompiler):
790 790 def __init__(self, extra_flags=0):
791 791 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
792 792
793 793
794 794 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
795 795
796 796 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
@@ -1,388 +1,405 b''
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 import platform
7 8 import string
8 9 import sys
9 10 from textwrap import dedent
10 11
11 12 import pytest
12 13
13 14 from IPython.core import inputtransformer2 as ipt2
14 15 from IPython.core.inputtransformer2 import _find_assign_op, make_tokens_by_line
15 16
16 17 MULTILINE_MAGIC = ("""\
17 18 a = f()
18 19 %foo \\
19 20 bar
20 21 g()
21 22 """.splitlines(keepends=True), (2, 0), """\
22 23 a = f()
23 24 get_ipython().run_line_magic('foo', ' bar')
24 25 g()
25 26 """.splitlines(keepends=True))
26 27
27 28 INDENTED_MAGIC = ("""\
28 29 for a in range(5):
29 30 %ls
30 31 """.splitlines(keepends=True), (2, 4), """\
31 32 for a in range(5):
32 33 get_ipython().run_line_magic('ls', '')
33 34 """.splitlines(keepends=True))
34 35
35 36 CRLF_MAGIC = ([
36 37 "a = f()\n",
37 38 "%ls\r\n",
38 39 "g()\n"
39 40 ], (2, 0), [
40 41 "a = f()\n",
41 42 "get_ipython().run_line_magic('ls', '')\n",
42 43 "g()\n"
43 44 ])
44 45
45 46 MULTILINE_MAGIC_ASSIGN = ("""\
46 47 a = f()
47 48 b = %foo \\
48 49 bar
49 50 g()
50 51 """.splitlines(keepends=True), (2, 4), """\
51 52 a = f()
52 53 b = get_ipython().run_line_magic('foo', ' bar')
53 54 g()
54 55 """.splitlines(keepends=True))
55 56
56 57 MULTILINE_SYSTEM_ASSIGN = ("""\
57 58 a = f()
58 59 b = !foo \\
59 60 bar
60 61 g()
61 62 """.splitlines(keepends=True), (2, 4), """\
62 63 a = f()
63 64 b = get_ipython().getoutput('foo bar')
64 65 g()
65 66 """.splitlines(keepends=True))
66 67
67 68 #####
68 69
69 70 MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\
70 71 def test():
71 72 for i in range(1):
72 73 print(i)
73 74 res =! ls
74 75 """.splitlines(keepends=True), (4, 7), '''\
75 76 def test():
76 77 for i in range(1):
77 78 print(i)
78 79 res =get_ipython().getoutput(\' ls\')
79 80 '''.splitlines(keepends=True))
80 81
81 82 ######
82 83
83 84 AUTOCALL_QUOTE = (
84 85 [",f 1 2 3\n"], (1, 0),
85 86 ['f("1", "2", "3")\n']
86 87 )
87 88
88 89 AUTOCALL_QUOTE2 = (
89 90 [";f 1 2 3\n"], (1, 0),
90 91 ['f("1 2 3")\n']
91 92 )
92 93
93 94 AUTOCALL_PAREN = (
94 95 ["/f 1 2 3\n"], (1, 0),
95 96 ['f(1, 2, 3)\n']
96 97 )
97 98
98 99 SIMPLE_HELP = (
99 100 ["foo?\n"], (1, 0),
100 101 ["get_ipython().run_line_magic('pinfo', 'foo')\n"]
101 102 )
102 103
103 104 DETAILED_HELP = (
104 105 ["foo??\n"], (1, 0),
105 106 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"]
106 107 )
107 108
108 109 MAGIC_HELP = (
109 110 ["%foo?\n"], (1, 0),
110 111 ["get_ipython().run_line_magic('pinfo', '%foo')\n"]
111 112 )
112 113
113 114 HELP_IN_EXPR = (
114 115 ["a = b + c?\n"], (1, 0),
115 116 ["get_ipython().set_next_input('a = b + c');"
116 117 "get_ipython().run_line_magic('pinfo', 'c')\n"]
117 118 )
118 119
119 120 HELP_CONTINUED_LINE = ("""\
120 121 a = \\
121 122 zip?
122 123 """.splitlines(keepends=True), (1, 0),
123 124 [r"get_ipython().set_next_input('a = \\\nzip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
124 125 )
125 126
126 127 HELP_MULTILINE = ("""\
127 128 (a,
128 129 b) = zip?
129 130 """.splitlines(keepends=True), (1, 0),
130 131 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
131 132 )
132 133
133 134 HELP_UNICODE = (
134 135 ["Ο€.foo?\n"], (1, 0),
135 136 ["get_ipython().run_line_magic('pinfo', 'Ο€.foo')\n"]
136 137 )
137 138
138 139
139 140 def null_cleanup_transformer(lines):
140 141 """
141 142 A cleanup transform that returns an empty list.
142 143 """
143 144 return []
144 145
145 146
146 147 def test_check_make_token_by_line_never_ends_empty():
147 148 """
148 149 Check that not sequence of single or double characters ends up leading to en empty list of tokens
149 150 """
150 151 from string import printable
151 152 for c in printable:
152 153 assert make_tokens_by_line(c)[-1] != []
153 154 for k in printable:
154 155 assert make_tokens_by_line(c + k)[-1] != []
155 156
156 157
157 158 def check_find(transformer, case, match=True):
158 159 sample, expected_start, _ = case
159 160 tbl = make_tokens_by_line(sample)
160 161 res = transformer.find(tbl)
161 162 if match:
162 163 # start_line is stored 0-indexed, expected values are 1-indexed
163 164 assert (res.start_line + 1, res.start_col) == expected_start
164 165 return res
165 166 else:
166 167 assert res is None
167 168
168 169 def check_transform(transformer_cls, case):
169 170 lines, start, expected = case
170 171 transformer = transformer_cls(start)
171 172 assert transformer.transform(lines) == expected
172 173
173 174 def test_continued_line():
174 175 lines = MULTILINE_MAGIC_ASSIGN[0]
175 176 assert ipt2.find_end_of_continued_line(lines, 1) == 2
176 177
177 178 assert ipt2.assemble_continued_line(lines, (1, 5), 2) == "foo bar"
178 179
179 180 def test_find_assign_magic():
180 181 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
181 182 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
182 183 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
183 184
184 185 def test_transform_assign_magic():
185 186 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
186 187
187 188 def test_find_assign_system():
188 189 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
189 190 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
190 191 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
191 192 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
192 193 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
193 194
194 195 def test_transform_assign_system():
195 196 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
196 197 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
197 198
198 199 def test_find_magic_escape():
199 200 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
200 201 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
201 202 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
202 203
203 204 def test_transform_magic_escape():
204 205 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
205 206 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
206 207 check_transform(ipt2.EscapedCommand, CRLF_MAGIC)
207 208
208 209 def test_find_autocalls():
209 210 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
210 211 print("Testing %r" % case[0])
211 212 check_find(ipt2.EscapedCommand, case)
212 213
213 214 def test_transform_autocall():
214 215 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
215 216 print("Testing %r" % case[0])
216 217 check_transform(ipt2.EscapedCommand, case)
217 218
218 219 def test_find_help():
219 220 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
220 221 check_find(ipt2.HelpEnd, case)
221 222
222 223 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
223 224 assert tf.q_line == 1
224 225 assert tf.q_col == 3
225 226
226 227 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
227 228 assert tf.q_line == 1
228 229 assert tf.q_col == 8
229 230
230 231 # ? in a comment does not trigger help
231 232 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
232 233 # Nor in a string
233 234 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
234 235
235 236 def test_transform_help():
236 237 tf = ipt2.HelpEnd((1, 0), (1, 9))
237 238 assert tf.transform(HELP_IN_EXPR[0]) == HELP_IN_EXPR[2]
238 239
239 240 tf = ipt2.HelpEnd((1, 0), (2, 3))
240 241 assert tf.transform(HELP_CONTINUED_LINE[0]) == HELP_CONTINUED_LINE[2]
241 242
242 243 tf = ipt2.HelpEnd((1, 0), (2, 8))
243 244 assert tf.transform(HELP_MULTILINE[0]) == HELP_MULTILINE[2]
244 245
245 246 tf = ipt2.HelpEnd((1, 0), (1, 0))
246 247 assert tf.transform(HELP_UNICODE[0]) == HELP_UNICODE[2]
247 248
248 249 def test_find_assign_op_dedent():
249 250 """
250 251 be careful that empty token like dedent are not counted as parens
251 252 """
252 253 class Tk:
253 254 def __init__(self, s):
254 255 self.string = s
255 256
256 257 assert _find_assign_op([Tk(s) for s in ("", "a", "=", "b")]) == 2
257 258 assert (
258 259 _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
259 260 )
260 261
261 262
262 263 examples = [
263 264 pytest.param("a = 1", "complete", None),
264 265 pytest.param("for a in range(5):", "incomplete", 4),
265 266 pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
266 267 pytest.param("raise = 2", "invalid", None),
267 268 pytest.param("a = [1,\n2,", "incomplete", 0),
268 269 pytest.param("(\n))", "incomplete", 0),
269 270 pytest.param("\\\r\n", "incomplete", 0),
270 271 pytest.param("a = '''\n hi", "incomplete", 3),
271 272 pytest.param("def a():\n x=1\n global x", "invalid", None),
272 273 pytest.param(
273 274 "a \\ ",
274 275 "invalid",
275 276 None,
276 277 marks=pytest.mark.xfail(
277 278 reason="Bug in python 3.9.8 – bpo 45738",
278 279 condition=sys.version_info
279 280 in [(3, 9, 8, "final", 0), (3, 11, 0, "alpha", 2)],
280 281 raises=SystemError,
281 282 strict=True,
282 283 ),
283 284 ), # Nothing allowed after backslash,
284 285 pytest.param("1\\\n+2", "complete", None),
285 286 ]
286 287
287 288
288 289 @pytest.mark.parametrize("code, expected, number", examples)
289 290 def test_check_complete_param(code, expected, number):
290 291 cc = ipt2.TransformerManager().check_complete
291 292 assert cc(code) == (expected, number)
292 293
293 294
295 @pytest.mark.xfail(platform.python_implementation() == "PyPy", reason="fail on pypy")
294 296 @pytest.mark.xfail(
295 297 reason="Bug in python 3.9.8 – bpo 45738",
296 298 condition=sys.version_info in [(3, 9, 8, "final", 0), (3, 11, 0, "alpha", 2)],
297 299 raises=SystemError,
298 300 strict=True,
299 301 )
300 302 def test_check_complete():
301 303 cc = ipt2.TransformerManager().check_complete
302 304
303 305 example = dedent("""
304 306 if True:
305 307 a=1""" )
306 308
307 309 assert cc(example) == ("incomplete", 4)
308 310 assert cc(example + "\n") == ("complete", None)
309 311 assert cc(example + "\n ") == ("complete", None)
310 312
311 313 # no need to loop on all the letters/numbers.
312 314 short = '12abAB'+string.printable[62:]
313 315 for c in short:
314 316 # test does not raise:
315 317 cc(c)
316 318 for k in short:
317 319 cc(c+k)
318 320
319 321 assert cc("def f():\n x=0\n \\\n ") == ("incomplete", 2)
320 322
321 323
322 def test_check_complete_II():
324 @pytest.mark.xfail(platform.python_implementation() == "PyPy", reason="fail on pypy")
325 @pytest.mark.parametrize(
326 "value, expected",
327 [
328 ('''def foo():\n """''', ("incomplete", 4)),
329 ("""async with example:\n pass""", ("incomplete", 4)),
330 ("""async with example:\n pass\n """, ("complete", None)),
331 ],
332 )
333 def test_check_complete_II(value, expected):
323 334 """
324 335 Test that multiple line strings are properly handled.
325 336
326 337 Separate test function for convenience
327 338
328 339 """
329 340 cc = ipt2.TransformerManager().check_complete
330 assert cc('''def foo():\n """''') == ("incomplete", 4)
331
332
333 def test_check_complete_invalidates_sunken_brackets():
341 assert cc(value) == expected
342
343
344 @pytest.mark.parametrize(
345 "value, expected",
346 [
347 (")", ("invalid", None)),
348 ("]", ("invalid", None)),
349 ("}", ("invalid", None)),
350 (")(", ("invalid", None)),
351 ("][", ("invalid", None)),
352 ("}{", ("invalid", None)),
353 ("]()(", ("invalid", None)),
354 ("())(", ("invalid", None)),
355 (")[](", ("invalid", None)),
356 ("()](", ("invalid", None)),
357 ],
358 )
359 def test_check_complete_invalidates_sunken_brackets(value, expected):
334 360 """
335 361 Test that a single line with more closing brackets than the opening ones is
336 362 interpreted as invalid
337 363 """
338 364 cc = ipt2.TransformerManager().check_complete
339 assert cc(")") == ("invalid", None)
340 assert cc("]") == ("invalid", None)
341 assert cc("}") == ("invalid", None)
342 assert cc(")(") == ("invalid", None)
343 assert cc("][") == ("invalid", None)
344 assert cc("}{") == ("invalid", None)
345 assert cc("]()(") == ("invalid", None)
346 assert cc("())(") == ("invalid", None)
347 assert cc(")[](") == ("invalid", None)
348 assert cc("()](") == ("invalid", None)
365 assert cc(value) == expected
349 366
350 367
351 368 def test_null_cleanup_transformer():
352 369 manager = ipt2.TransformerManager()
353 370 manager.cleanup_transforms.insert(0, null_cleanup_transformer)
354 371 assert manager.transform_cell("") == ""
355 372
356 373
357 374
358 375
359 376 def test_side_effects_I():
360 377 count = 0
361 378 def counter(lines):
362 379 nonlocal count
363 380 count += 1
364 381 return lines
365 382
366 383 counter.has_side_effects = True
367 384
368 385 manager = ipt2.TransformerManager()
369 386 manager.cleanup_transforms.insert(0, counter)
370 387 assert manager.check_complete("a=1\n") == ('complete', None)
371 388 assert count == 0
372 389
373 390
374 391
375 392
376 393 def test_side_effects_II():
377 394 count = 0
378 395 def counter(lines):
379 396 nonlocal count
380 397 count += 1
381 398 return lines
382 399
383 400 counter.has_side_effects = True
384 401
385 402 manager = ipt2.TransformerManager()
386 403 manager.line_transforms.insert(0, counter)
387 404 assert manager.check_complete("b=1\n") == ('complete', None)
388 405 assert count == 0
General Comments 0
You need to be logged in to leave comments. Login now