##// END OF EJS Templates
Merge pull request #13120 from meeseeksmachine/auto-backport-of-pr-13090-on-7.x...
Matthias Bussonnier -
r26759:06e21e83 merge
parent child Browse files
Show More
@@ -1,726 +1,750 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 from codeop import compile_command
13 import ast
14 import sys
15 from codeop import CommandCompiler, Compile
14 16 import re
15 17 import tokenize
16 18 from typing import List, Tuple, Union
17 19 import warnings
18 20
19 21 _indent_re = re.compile(r'^[ \t]+')
20 22
21 23 def leading_empty_lines(lines):
22 24 """Remove leading empty lines
23 25
24 26 If the leading lines are empty or contain only whitespace, they will be
25 27 removed.
26 28 """
27 29 if not lines:
28 30 return lines
29 31 for i, line in enumerate(lines):
30 32 if line and not line.isspace():
31 33 return lines[i:]
32 34 return lines
33 35
34 36 def leading_indent(lines):
35 37 """Remove leading indentation.
36 38
37 39 If the first line starts with a spaces or tabs, the same whitespace will be
38 40 removed from each following line in the cell.
39 41 """
40 42 if not lines:
41 43 return lines
42 44 m = _indent_re.match(lines[0])
43 45 if not m:
44 46 return lines
45 47 space = m.group(0)
46 48 n = len(space)
47 49 return [l[n:] if l.startswith(space) else l
48 50 for l in lines]
49 51
50 52 class PromptStripper:
51 53 """Remove matching input prompts from a block of input.
52 54
53 55 Parameters
54 56 ----------
55 57 prompt_re : regular expression
56 58 A regular expression matching any input prompt (including continuation,
57 59 e.g. ``...``)
58 60 initial_re : regular expression, optional
59 61 A regular expression matching only the initial prompt, but not continuation.
60 62 If no initial expression is given, prompt_re will be used everywhere.
61 63 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 64 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 65
64 66 Notes
65 67 -----
66 68
67 69 If initial_re and prompt_re differ,
68 70 only initial_re will be tested against the first line.
69 71 If any prompt is found on the first two lines,
70 72 prompts will be stripped from the rest of the block.
71 73 """
72 74 def __init__(self, prompt_re, initial_re=None):
73 75 self.prompt_re = prompt_re
74 76 self.initial_re = initial_re or prompt_re
75 77
76 78 def _strip(self, lines):
77 79 return [self.prompt_re.sub('', l, count=1) for l in lines]
78 80
79 81 def __call__(self, lines):
80 82 if not lines:
81 83 return lines
82 84 if self.initial_re.match(lines[0]) or \
83 85 (len(lines) > 1 and self.prompt_re.match(lines[1])):
84 86 return self._strip(lines)
85 87 return lines
86 88
87 89 classic_prompt = PromptStripper(
88 90 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
89 91 initial_re=re.compile(r'^>>>( |$)')
90 92 )
91 93
92 94 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
93 95
94 96 def cell_magic(lines):
95 97 if not lines or not lines[0].startswith('%%'):
96 98 return lines
97 99 if re.match(r'%%\w+\?', lines[0]):
98 100 # This case will be handled by help_end
99 101 return lines
100 102 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
101 103 body = ''.join(lines[1:])
102 104 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
103 105 % (magic_name, first_line, body)]
104 106
105 107
106 108 def _find_assign_op(token_line) -> Union[int, None]:
107 109 """Get the index of the first assignment in the line ('=' not inside brackets)
108 110
109 111 Note: We don't try to support multiple special assignment (a = b = %foo)
110 112 """
111 113 paren_level = 0
112 114 for i, ti in enumerate(token_line):
113 115 s = ti.string
114 116 if s == '=' and paren_level == 0:
115 117 return i
116 118 if s in {'(','[','{'}:
117 119 paren_level += 1
118 120 elif s in {')', ']', '}'}:
119 121 if paren_level > 0:
120 122 paren_level -= 1
121 123
122 124 def find_end_of_continued_line(lines, start_line: int):
123 125 """Find the last line of a line explicitly extended using backslashes.
124 126
125 127 Uses 0-indexed line numbers.
126 128 """
127 129 end_line = start_line
128 130 while lines[end_line].endswith('\\\n'):
129 131 end_line += 1
130 132 if end_line >= len(lines):
131 133 break
132 134 return end_line
133 135
134 136 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
135 137 r"""Assemble a single line from multiple continued line pieces
136 138
137 139 Continued lines are lines ending in ``\``, and the line following the last
138 140 ``\`` in the block.
139 141
140 142 For example, this code continues over multiple lines::
141 143
142 144 if (assign_ix is not None) \
143 145 and (len(line) >= assign_ix + 2) \
144 146 and (line[assign_ix+1].string == '%') \
145 147 and (line[assign_ix+2].type == tokenize.NAME):
146 148
147 149 This statement contains four continued line pieces.
148 150 Assembling these pieces into a single line would give::
149 151
150 152 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
151 153
152 154 This uses 0-indexed line numbers. *start* is (lineno, colno).
153 155
154 156 Used to allow ``%magic`` and ``!system`` commands to be continued over
155 157 multiple lines.
156 158 """
157 159 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
158 160 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
159 161 + [parts[-1].rstrip()]) # Strip newline from last line
160 162
161 163 class TokenTransformBase:
162 164 """Base class for transformations which examine tokens.
163 165
164 166 Special syntax should not be transformed when it occurs inside strings or
165 167 comments. This is hard to reliably avoid with regexes. The solution is to
166 168 tokenise the code as Python, and recognise the special syntax in the tokens.
167 169
168 170 IPython's special syntax is not valid Python syntax, so tokenising may go
169 171 wrong after the special syntax starts. These classes therefore find and
170 172 transform *one* instance of special syntax at a time into regular Python
171 173 syntax. After each transformation, tokens are regenerated to find the next
172 174 piece of special syntax.
173 175
174 176 Subclasses need to implement one class method (find)
175 177 and one regular method (transform).
176 178
177 179 The priority attribute can select which transformation to apply if multiple
178 180 transformers match in the same place. Lower numbers have higher priority.
179 181 This allows "%magic?" to be turned into a help call rather than a magic call.
180 182 """
181 183 # Lower numbers -> higher priority (for matches in the same location)
182 184 priority = 10
183 185
184 186 def sortby(self):
185 187 return self.start_line, self.start_col, self.priority
186 188
187 189 def __init__(self, start):
188 190 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
189 191 self.start_col = start[1]
190 192
191 193 @classmethod
192 194 def find(cls, tokens_by_line):
193 195 """Find one instance of special syntax in the provided tokens.
194 196
195 197 Tokens are grouped into logical lines for convenience,
196 198 so it is easy to e.g. look at the first token of each line.
197 199 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
198 200
199 201 This should return an instance of its class, pointing to the start
200 202 position it has found, or None if it found no match.
201 203 """
202 204 raise NotImplementedError
203 205
204 206 def transform(self, lines: List[str]):
205 207 """Transform one instance of special syntax found by ``find()``
206 208
207 209 Takes a list of strings representing physical lines,
208 210 returns a similar list of transformed lines.
209 211 """
210 212 raise NotImplementedError
211 213
212 214 class MagicAssign(TokenTransformBase):
213 215 """Transformer for assignments from magics (a = %foo)"""
214 216 @classmethod
215 217 def find(cls, tokens_by_line):
216 218 """Find the first magic assignment (a = %foo) in the cell.
217 219 """
218 220 for line in tokens_by_line:
219 221 assign_ix = _find_assign_op(line)
220 222 if (assign_ix is not None) \
221 223 and (len(line) >= assign_ix + 2) \
222 224 and (line[assign_ix+1].string == '%') \
223 225 and (line[assign_ix+2].type == tokenize.NAME):
224 226 return cls(line[assign_ix+1].start)
225 227
226 228 def transform(self, lines: List[str]):
227 229 """Transform a magic assignment found by the ``find()`` classmethod.
228 230 """
229 231 start_line, start_col = self.start_line, self.start_col
230 232 lhs = lines[start_line][:start_col]
231 233 end_line = find_end_of_continued_line(lines, start_line)
232 234 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
233 235 assert rhs.startswith('%'), rhs
234 236 magic_name, _, args = rhs[1:].partition(' ')
235 237
236 238 lines_before = lines[:start_line]
237 239 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
238 240 new_line = lhs + call + '\n'
239 241 lines_after = lines[end_line+1:]
240 242
241 243 return lines_before + [new_line] + lines_after
242 244
243 245
244 246 class SystemAssign(TokenTransformBase):
245 247 """Transformer for assignments from system commands (a = !foo)"""
246 248 @classmethod
247 249 def find(cls, tokens_by_line):
248 250 """Find the first system assignment (a = !foo) in the cell.
249 251 """
250 252 for line in tokens_by_line:
251 253 assign_ix = _find_assign_op(line)
252 254 if (assign_ix is not None) \
253 255 and not line[assign_ix].line.strip().startswith('=') \
254 256 and (len(line) >= assign_ix + 2) \
255 257 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
256 258 ix = assign_ix + 1
257 259
258 260 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
259 261 if line[ix].string == '!':
260 262 return cls(line[ix].start)
261 263 elif not line[ix].string.isspace():
262 264 break
263 265 ix += 1
264 266
265 267 def transform(self, lines: List[str]):
266 268 """Transform a system assignment found by the ``find()`` classmethod.
267 269 """
268 270 start_line, start_col = self.start_line, self.start_col
269 271
270 272 lhs = lines[start_line][:start_col]
271 273 end_line = find_end_of_continued_line(lines, start_line)
272 274 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
273 275 assert rhs.startswith('!'), rhs
274 276 cmd = rhs[1:]
275 277
276 278 lines_before = lines[:start_line]
277 279 call = "get_ipython().getoutput({!r})".format(cmd)
278 280 new_line = lhs + call + '\n'
279 281 lines_after = lines[end_line + 1:]
280 282
281 283 return lines_before + [new_line] + lines_after
282 284
283 285 # The escape sequences that define the syntax transformations IPython will
284 286 # apply to user input. These can NOT be just changed here: many regular
285 287 # expressions and other parts of the code may use their hardcoded values, and
286 288 # for all intents and purposes they constitute the 'IPython syntax', so they
287 289 # should be considered fixed.
288 290
289 291 ESC_SHELL = '!' # Send line to underlying system shell
290 292 ESC_SH_CAP = '!!' # Send line to system shell and capture output
291 293 ESC_HELP = '?' # Find information about object
292 294 ESC_HELP2 = '??' # Find extra-detailed information about object
293 295 ESC_MAGIC = '%' # Call magic function
294 296 ESC_MAGIC2 = '%%' # Call cell-magic function
295 297 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
296 298 ESC_QUOTE2 = ';' # Quote all args as a single string, call
297 299 ESC_PAREN = '/' # Call first argument with rest of line as arguments
298 300
299 301 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
300 302 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
301 303
302 304 def _make_help_call(target, esc, next_input=None):
303 305 """Prepares a pinfo(2)/psearch call from a target name and the escape
304 306 (i.e. ? or ??)"""
305 307 method = 'pinfo2' if esc == '??' \
306 308 else 'psearch' if '*' in target \
307 309 else 'pinfo'
308 310 arg = " ".join([method, target])
309 311 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
310 312 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
311 313 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
312 314 if next_input is None:
313 315 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
314 316 else:
315 317 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
316 318 (next_input, t_magic_name, t_magic_arg_s)
317 319
318 320 def _tr_help(content):
319 321 """Translate lines escaped with: ?
320 322
321 323 A naked help line should fire the intro help screen (shell.show_usage())
322 324 """
323 325 if not content:
324 326 return 'get_ipython().show_usage()'
325 327
326 328 return _make_help_call(content, '?')
327 329
328 330 def _tr_help2(content):
329 331 """Translate lines escaped with: ??
330 332
331 333 A naked help line should fire the intro help screen (shell.show_usage())
332 334 """
333 335 if not content:
334 336 return 'get_ipython().show_usage()'
335 337
336 338 return _make_help_call(content, '??')
337 339
338 340 def _tr_magic(content):
339 341 "Translate lines escaped with a percent sign: %"
340 342 name, _, args = content.partition(' ')
341 343 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
342 344
343 345 def _tr_quote(content):
344 346 "Translate lines escaped with a comma: ,"
345 347 name, _, args = content.partition(' ')
346 348 return '%s("%s")' % (name, '", "'.join(args.split()) )
347 349
348 350 def _tr_quote2(content):
349 351 "Translate lines escaped with a semicolon: ;"
350 352 name, _, args = content.partition(' ')
351 353 return '%s("%s")' % (name, args)
352 354
353 355 def _tr_paren(content):
354 356 "Translate lines escaped with a slash: /"
355 357 name, _, args = content.partition(' ')
356 358 return '%s(%s)' % (name, ", ".join(args.split()))
357 359
358 360 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
359 361 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
360 362 ESC_HELP : _tr_help,
361 363 ESC_HELP2 : _tr_help2,
362 364 ESC_MAGIC : _tr_magic,
363 365 ESC_QUOTE : _tr_quote,
364 366 ESC_QUOTE2 : _tr_quote2,
365 367 ESC_PAREN : _tr_paren }
366 368
367 369 class EscapedCommand(TokenTransformBase):
368 370 """Transformer for escaped commands like %foo, !foo, or /foo"""
369 371 @classmethod
370 372 def find(cls, tokens_by_line):
371 373 """Find the first escaped command (%foo, !foo, etc.) in the cell.
372 374 """
373 375 for line in tokens_by_line:
374 376 if not line:
375 377 continue
376 378 ix = 0
377 379 ll = len(line)
378 380 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
379 381 ix += 1
380 382 if ix >= ll:
381 383 continue
382 384 if line[ix].string in ESCAPE_SINGLES:
383 385 return cls(line[ix].start)
384 386
385 387 def transform(self, lines):
386 388 """Transform an escaped line found by the ``find()`` classmethod.
387 389 """
388 390 start_line, start_col = self.start_line, self.start_col
389 391
390 392 indent = lines[start_line][:start_col]
391 393 end_line = find_end_of_continued_line(lines, start_line)
392 394 line = assemble_continued_line(lines, (start_line, start_col), end_line)
393 395
394 396 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
395 397 escape, content = line[:2], line[2:]
396 398 else:
397 399 escape, content = line[:1], line[1:]
398 400
399 401 if escape in tr:
400 402 call = tr[escape](content)
401 403 else:
402 404 call = ''
403 405
404 406 lines_before = lines[:start_line]
405 407 new_line = indent + call + '\n'
406 408 lines_after = lines[end_line + 1:]
407 409
408 410 return lines_before + [new_line] + lines_after
409 411
410 412 _help_end_re = re.compile(r"""(%{0,2}
411 413 (?!\d)[\w*]+ # Variable name
412 414 (\.(?!\d)[\w*]+)* # .etc.etc
413 415 )
414 416 (\?\??)$ # ? or ??
415 417 """,
416 418 re.VERBOSE)
417 419
418 420 class HelpEnd(TokenTransformBase):
419 421 """Transformer for help syntax: obj? and obj??"""
420 422 # This needs to be higher priority (lower number) than EscapedCommand so
421 423 # that inspecting magics (%foo?) works.
422 424 priority = 5
423 425
424 426 def __init__(self, start, q_locn):
425 427 super().__init__(start)
426 428 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
427 429 self.q_col = q_locn[1]
428 430
429 431 @classmethod
430 432 def find(cls, tokens_by_line):
431 433 """Find the first help command (foo?) in the cell.
432 434 """
433 435 for line in tokens_by_line:
434 436 # Last token is NEWLINE; look at last but one
435 437 if len(line) > 2 and line[-2].string == '?':
436 438 # Find the first token that's not INDENT/DEDENT
437 439 ix = 0
438 440 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
439 441 ix += 1
440 442 return cls(line[ix].start, line[-2].start)
441 443
442 444 def transform(self, lines):
443 445 """Transform a help command found by the ``find()`` classmethod.
444 446 """
445 447 piece = ''.join(lines[self.start_line:self.q_line+1])
446 448 indent, content = piece[:self.start_col], piece[self.start_col:]
447 449 lines_before = lines[:self.start_line]
448 450 lines_after = lines[self.q_line + 1:]
449 451
450 452 m = _help_end_re.search(content)
451 453 if not m:
452 454 raise SyntaxError(content)
453 455 assert m is not None, content
454 456 target = m.group(1)
455 457 esc = m.group(3)
456 458
457 459 # If we're mid-command, put it back on the next prompt for the user.
458 460 next_input = None
459 461 if (not lines_before) and (not lines_after) \
460 462 and content.strip() != m.group(0):
461 463 next_input = content.rstrip('?\n')
462 464
463 465 call = _make_help_call(target, esc, next_input=next_input)
464 466 new_line = indent + call + '\n'
465 467
466 468 return lines_before + [new_line] + lines_after
467 469
468 470 def make_tokens_by_line(lines:List[str]):
469 471 """Tokenize a series of lines and group tokens by line.
470 472
471 473 The tokens for a multiline Python string or expression are grouped as one
472 474 line. All lines except the last lines should keep their line ending ('\\n',
473 475 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
474 476 for example when passing block of text to this function.
475 477
476 478 """
477 479 # NL tokens are used inside multiline expressions, but also after blank
478 480 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
479 481 # We want to group the former case together but split the latter, so we
480 482 # track parentheses level, similar to the internals of tokenize.
481 483 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
482 484 tokens_by_line = [[]]
483 485 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
484 486 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
485 487 parenlev = 0
486 488 try:
487 489 for token in tokenize.generate_tokens(iter(lines).__next__):
488 490 tokens_by_line[-1].append(token)
489 491 if (token.type == NEWLINE) \
490 492 or ((token.type == NL) and (parenlev <= 0)):
491 493 tokens_by_line.append([])
492 494 elif token.string in {'(', '[', '{'}:
493 495 parenlev += 1
494 496 elif token.string in {')', ']', '}'}:
495 497 if parenlev > 0:
496 498 parenlev -= 1
497 499 except tokenize.TokenError:
498 500 # Input ended in a multiline string or expression. That's OK for us.
499 501 pass
500 502
501 503
502 504 if not tokens_by_line[-1]:
503 505 tokens_by_line.pop()
504 506
505 507
506 508 return tokens_by_line
507 509
508 510 def show_linewise_tokens(s: str):
509 511 """For investigation and debugging"""
510 512 if not s.endswith('\n'):
511 513 s += '\n'
512 514 lines = s.splitlines(keepends=True)
513 515 for line in make_tokens_by_line(lines):
514 516 print("Line -------")
515 517 for tokinfo in line:
516 518 print(" ", tokinfo)
517 519
518 520 # Arbitrary limit to prevent getting stuck in infinite loops
519 521 TRANSFORM_LOOP_LIMIT = 500
520 522
521 523 class TransformerManager:
522 524 """Applies various transformations to a cell or code block.
523 525
524 526 The key methods for external use are ``transform_cell()``
525 527 and ``check_complete()``.
526 528 """
527 529 def __init__(self):
528 530 self.cleanup_transforms = [
529 531 leading_empty_lines,
530 532 leading_indent,
531 533 classic_prompt,
532 534 ipython_prompt,
533 535 ]
534 536 self.line_transforms = [
535 537 cell_magic,
536 538 ]
537 539 self.token_transformers = [
538 540 MagicAssign,
539 541 SystemAssign,
540 542 EscapedCommand,
541 543 HelpEnd,
542 544 ]
543 545
544 546 def do_one_token_transform(self, lines):
545 547 """Find and run the transform earliest in the code.
546 548
547 549 Returns (changed, lines).
548 550
549 551 This method is called repeatedly until changed is False, indicating
550 552 that all available transformations are complete.
551 553
552 554 The tokens following IPython special syntax might not be valid, so
553 555 the transformed code is retokenised every time to identify the next
554 556 piece of special syntax. Hopefully long code cells are mostly valid
555 557 Python, not using lots of IPython special syntax, so this shouldn't be
556 558 a performance issue.
557 559 """
558 560 tokens_by_line = make_tokens_by_line(lines)
559 561 candidates = []
560 562 for transformer_cls in self.token_transformers:
561 563 transformer = transformer_cls.find(tokens_by_line)
562 564 if transformer:
563 565 candidates.append(transformer)
564 566
565 567 if not candidates:
566 568 # Nothing to transform
567 569 return False, lines
568 570 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
569 571 for transformer in ordered_transformers:
570 572 try:
571 573 return True, transformer.transform(lines)
572 574 except SyntaxError:
573 575 pass
574 576 return False, lines
575 577
576 578 def do_token_transforms(self, lines):
577 579 for _ in range(TRANSFORM_LOOP_LIMIT):
578 580 changed, lines = self.do_one_token_transform(lines)
579 581 if not changed:
580 582 return lines
581 583
582 584 raise RuntimeError("Input transformation still changing after "
583 585 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
584 586
585 587 def transform_cell(self, cell: str) -> str:
586 588 """Transforms a cell of input code"""
587 589 if not cell.endswith('\n'):
588 590 cell += '\n' # Ensure the cell has a trailing newline
589 591 lines = cell.splitlines(keepends=True)
590 592 for transform in self.cleanup_transforms + self.line_transforms:
591 593 lines = transform(lines)
592 594
593 595 lines = self.do_token_transforms(lines)
594 596 return ''.join(lines)
595 597
596 598 def check_complete(self, cell: str):
597 599 """Return whether a block of code is ready to execute, or should be continued
598 600
599 601 Parameters
600 602 ----------
601 603 source : string
602 604 Python input code, which can be multiline.
603 605
604 606 Returns
605 607 -------
606 608 status : str
607 609 One of 'complete', 'incomplete', or 'invalid' if source is not a
608 610 prefix of valid code.
609 611 indent_spaces : int or None
610 612 The number of spaces by which to indent the next line of code. If
611 613 status is not 'incomplete', this is None.
612 614 """
613 615 # Remember if the lines ends in a new line.
614 616 ends_with_newline = False
615 617 for character in reversed(cell):
616 618 if character == '\n':
617 619 ends_with_newline = True
618 620 break
619 621 elif character.strip():
620 622 break
621 623 else:
622 624 continue
623 625
624 626 if not ends_with_newline:
625 627 # Append an newline for consistent tokenization
626 628 # See https://bugs.python.org/issue33899
627 629 cell += '\n'
628 630
629 631 lines = cell.splitlines(keepends=True)
630 632
631 633 if not lines:
632 634 return 'complete', None
633 635
634 636 if lines[-1].endswith('\\'):
635 637 # Explicit backslash continuation
636 638 return 'incomplete', find_last_indent(lines)
637 639
638 640 try:
639 641 for transform in self.cleanup_transforms:
640 642 if not getattr(transform, 'has_side_effects', False):
641 643 lines = transform(lines)
642 644 except SyntaxError:
643 645 return 'invalid', None
644 646
645 647 if lines[0].startswith('%%'):
646 648 # Special case for cell magics - completion marked by blank line
647 649 if lines[-1].strip():
648 650 return 'incomplete', find_last_indent(lines)
649 651 else:
650 652 return 'complete', None
651 653
652 654 try:
653 655 for transform in self.line_transforms:
654 656 if not getattr(transform, 'has_side_effects', False):
655 657 lines = transform(lines)
656 658 lines = self.do_token_transforms(lines)
657 659 except SyntaxError:
658 660 return 'invalid', None
659 661
660 662 tokens_by_line = make_tokens_by_line(lines)
661 663
662 664 if not tokens_by_line:
663 665 return 'incomplete', find_last_indent(lines)
664 666
665 667 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
666 668 # We're in a multiline string or expression
667 669 return 'incomplete', find_last_indent(lines)
668 670
669 671 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
670 672
671 673 # Pop the last line which only contains DEDENTs and ENDMARKER
672 674 last_token_line = None
673 675 if {t.type for t in tokens_by_line[-1]} in [
674 676 {tokenize.DEDENT, tokenize.ENDMARKER},
675 677 {tokenize.ENDMARKER}
676 678 ] and len(tokens_by_line) > 1:
677 679 last_token_line = tokens_by_line.pop()
678 680
679 681 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
680 682 tokens_by_line[-1].pop()
681 683
682 684 if not tokens_by_line[-1]:
683 685 return 'incomplete', find_last_indent(lines)
684 686
685 687 if tokens_by_line[-1][-1].string == ':':
686 688 # The last line starts a block (e.g. 'if foo:')
687 689 ix = 0
688 690 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
689 691 ix += 1
690 692
691 693 indent = tokens_by_line[-1][ix].start[1]
692 694 return 'incomplete', indent + 4
693 695
694 696 if tokens_by_line[-1][0].line.endswith('\\'):
695 697 return 'incomplete', None
696 698
697 699 # At this point, our checks think the code is complete (or invalid).
698 700 # We'll use codeop.compile_command to check this with the real parser
699 701 try:
700 702 with warnings.catch_warnings():
701 703 warnings.simplefilter('error', SyntaxWarning)
702 704 res = compile_command(''.join(lines), symbol='exec')
703 705 except (SyntaxError, OverflowError, ValueError, TypeError,
704 706 MemoryError, SyntaxWarning):
705 707 return 'invalid', None
706 708 else:
707 709 if res is None:
708 710 return 'incomplete', find_last_indent(lines)
709 711
710 712 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
711 713 if ends_with_newline:
712 714 return 'complete', None
713 715 return 'incomplete', find_last_indent(lines)
714 716
715 717 # If there's a blank line at the end, assume we're ready to execute
716 718 if not lines[-1].strip():
717 719 return 'complete', None
718 720
719 721 return 'complete', None
720 722
721 723
722 724 def find_last_indent(lines):
723 725 m = _indent_re.match(lines[-1])
724 726 if not m:
725 727 return 0
726 728 return len(m.group(0).replace('\t', ' '*4))
729
730
731 class MaybeAsyncCompile(Compile):
732 def __init__(self, extra_flags=0):
733 super().__init__()
734 self.flags |= extra_flags
735
736 def __call__(self, *args, **kwds):
737 return compile(*args, **kwds)
738
739
740 class MaybeAsyncCommandCompiler(CommandCompiler):
741 def __init__(self, extra_flags=0):
742 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
743
744
745 if (sys.version_info.major, sys.version_info.minor) >= (3, 8):
746 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
747 else:
748 _extra_flags = ast.PyCF_ONLY_AST
749
750 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
General Comments 0
You need to be logged in to leave comments. Login now