##// END OF EJS Templates
Merge pull request #12503 from Carreau/fix-doc
Matthias Bussonnier -
r25961:a88c08fb merge
parent child Browse files
Show More
@@ -1,726 +1,729 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Optional, Any
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 Notes
65 -----
66
64 67 If initial_re and prompt_re differ,
65 68 only initial_re will be tested against the first line.
66 69 If any prompt is found on the first two lines,
67 70 prompts will be stripped from the rest of the block.
68 71 """
69 72 def __init__(self, prompt_re, initial_re=None):
70 73 self.prompt_re = prompt_re
71 74 self.initial_re = initial_re or prompt_re
72 75
73 76 def _strip(self, lines):
74 77 return [self.prompt_re.sub('', l, count=1) for l in lines]
75 78
76 79 def __call__(self, lines):
77 80 if not lines:
78 81 return lines
79 82 if self.initial_re.match(lines[0]) or \
80 83 (len(lines) > 1 and self.prompt_re.match(lines[1])):
81 84 return self._strip(lines)
82 85 return lines
83 86
84 87 classic_prompt = PromptStripper(
85 88 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
86 89 initial_re=re.compile(r'^>>>( |$)')
87 90 )
88 91
89 92 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
90 93
91 94 def cell_magic(lines):
92 95 if not lines or not lines[0].startswith('%%'):
93 96 return lines
94 97 if re.match(r'%%\w+\?', lines[0]):
95 98 # This case will be handled by help_end
96 99 return lines
97 100 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
98 101 body = ''.join(lines[1:])
99 102 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
100 103 % (magic_name, first_line, body)]
101 104
102 105
103 106 def _find_assign_op(token_line) -> Optional[int]:
104 107 """Get the index of the first assignment in the line ('=' not inside brackets)
105 108
106 109 Note: We don't try to support multiple special assignment (a = b = %foo)
107 110 """
108 111 paren_level = 0
109 112 for i, ti in enumerate(token_line):
110 113 s = ti.string
111 114 if s == '=' and paren_level == 0:
112 115 return i
113 116 if s in {'(','[','{'}:
114 117 paren_level += 1
115 118 elif s in {')', ']', '}'}:
116 119 if paren_level > 0:
117 120 paren_level -= 1
118 121 return None
119 122
120 123 def find_end_of_continued_line(lines, start_line: int):
121 124 """Find the last line of a line explicitly extended using backslashes.
122 125
123 126 Uses 0-indexed line numbers.
124 127 """
125 128 end_line = start_line
126 129 while lines[end_line].endswith('\\\n'):
127 130 end_line += 1
128 131 if end_line >= len(lines):
129 132 break
130 133 return end_line
131 134
132 135 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
133 136 r"""Assemble a single line from multiple continued line pieces
134 137
135 138 Continued lines are lines ending in ``\``, and the line following the last
136 139 ``\`` in the block.
137 140
138 141 For example, this code continues over multiple lines::
139 142
140 143 if (assign_ix is not None) \
141 144 and (len(line) >= assign_ix + 2) \
142 145 and (line[assign_ix+1].string == '%') \
143 146 and (line[assign_ix+2].type == tokenize.NAME):
144 147
145 148 This statement contains four continued line pieces.
146 149 Assembling these pieces into a single line would give::
147 150
148 151 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
149 152
150 153 This uses 0-indexed line numbers. *start* is (lineno, colno).
151 154
152 155 Used to allow ``%magic`` and ``!system`` commands to be continued over
153 156 multiple lines.
154 157 """
155 158 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
156 159 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
157 160 + [parts[-1].rstrip()]) # Strip newline from last line
158 161
159 162 class TokenTransformBase:
160 163 """Base class for transformations which examine tokens.
161 164
162 165 Special syntax should not be transformed when it occurs inside strings or
163 166 comments. This is hard to reliably avoid with regexes. The solution is to
164 167 tokenise the code as Python, and recognise the special syntax in the tokens.
165 168
166 169 IPython's special syntax is not valid Python syntax, so tokenising may go
167 170 wrong after the special syntax starts. These classes therefore find and
168 171 transform *one* instance of special syntax at a time into regular Python
169 172 syntax. After each transformation, tokens are regenerated to find the next
170 173 piece of special syntax.
171 174
172 175 Subclasses need to implement one class method (find)
173 176 and one regular method (transform).
174 177
175 178 The priority attribute can select which transformation to apply if multiple
176 179 transformers match in the same place. Lower numbers have higher priority.
177 180 This allows "%magic?" to be turned into a help call rather than a magic call.
178 181 """
179 182 # Lower numbers -> higher priority (for matches in the same location)
180 183 priority = 10
181 184
182 185 def sortby(self):
183 186 return self.start_line, self.start_col, self.priority
184 187
185 188 def __init__(self, start):
186 189 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
187 190 self.start_col = start[1]
188 191
189 192 @classmethod
190 193 def find(cls, tokens_by_line):
191 194 """Find one instance of special syntax in the provided tokens.
192 195
193 196 Tokens are grouped into logical lines for convenience,
194 197 so it is easy to e.g. look at the first token of each line.
195 198 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
196 199
197 200 This should return an instance of its class, pointing to the start
198 201 position it has found, or None if it found no match.
199 202 """
200 203 raise NotImplementedError
201 204
202 205 def transform(self, lines: List[str]):
203 206 """Transform one instance of special syntax found by ``find()``
204 207
205 208 Takes a list of strings representing physical lines,
206 209 returns a similar list of transformed lines.
207 210 """
208 211 raise NotImplementedError
209 212
210 213 class MagicAssign(TokenTransformBase):
211 214 """Transformer for assignments from magics (a = %foo)"""
212 215 @classmethod
213 216 def find(cls, tokens_by_line):
214 217 """Find the first magic assignment (a = %foo) in the cell.
215 218 """
216 219 for line in tokens_by_line:
217 220 assign_ix = _find_assign_op(line)
218 221 if (assign_ix is not None) \
219 222 and (len(line) >= assign_ix + 2) \
220 223 and (line[assign_ix+1].string == '%') \
221 224 and (line[assign_ix+2].type == tokenize.NAME):
222 225 return cls(line[assign_ix+1].start)
223 226
224 227 def transform(self, lines: List[str]):
225 228 """Transform a magic assignment found by the ``find()`` classmethod.
226 229 """
227 230 start_line, start_col = self.start_line, self.start_col
228 231 lhs = lines[start_line][:start_col]
229 232 end_line = find_end_of_continued_line(lines, start_line)
230 233 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
231 234 assert rhs.startswith('%'), rhs
232 235 magic_name, _, args = rhs[1:].partition(' ')
233 236
234 237 lines_before = lines[:start_line]
235 238 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
236 239 new_line = lhs + call + '\n'
237 240 lines_after = lines[end_line+1:]
238 241
239 242 return lines_before + [new_line] + lines_after
240 243
241 244
242 245 class SystemAssign(TokenTransformBase):
243 246 """Transformer for assignments from system commands (a = !foo)"""
244 247 @classmethod
245 248 def find(cls, tokens_by_line):
246 249 """Find the first system assignment (a = !foo) in the cell.
247 250 """
248 251 for line in tokens_by_line:
249 252 assign_ix = _find_assign_op(line)
250 253 if (assign_ix is not None) \
251 254 and not line[assign_ix].line.strip().startswith('=') \
252 255 and (len(line) >= assign_ix + 2) \
253 256 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
254 257 ix = assign_ix + 1
255 258
256 259 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
257 260 if line[ix].string == '!':
258 261 return cls(line[ix].start)
259 262 elif not line[ix].string.isspace():
260 263 break
261 264 ix += 1
262 265
263 266 def transform(self, lines: List[str]):
264 267 """Transform a system assignment found by the ``find()`` classmethod.
265 268 """
266 269 start_line, start_col = self.start_line, self.start_col
267 270
268 271 lhs = lines[start_line][:start_col]
269 272 end_line = find_end_of_continued_line(lines, start_line)
270 273 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
271 274 assert rhs.startswith('!'), rhs
272 275 cmd = rhs[1:]
273 276
274 277 lines_before = lines[:start_line]
275 278 call = "get_ipython().getoutput({!r})".format(cmd)
276 279 new_line = lhs + call + '\n'
277 280 lines_after = lines[end_line + 1:]
278 281
279 282 return lines_before + [new_line] + lines_after
280 283
281 284 # The escape sequences that define the syntax transformations IPython will
282 285 # apply to user input. These can NOT be just changed here: many regular
283 286 # expressions and other parts of the code may use their hardcoded values, and
284 287 # for all intents and purposes they constitute the 'IPython syntax', so they
285 288 # should be considered fixed.
286 289
287 290 ESC_SHELL = '!' # Send line to underlying system shell
288 291 ESC_SH_CAP = '!!' # Send line to system shell and capture output
289 292 ESC_HELP = '?' # Find information about object
290 293 ESC_HELP2 = '??' # Find extra-detailed information about object
291 294 ESC_MAGIC = '%' # Call magic function
292 295 ESC_MAGIC2 = '%%' # Call cell-magic function
293 296 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
294 297 ESC_QUOTE2 = ';' # Quote all args as a single string, call
295 298 ESC_PAREN = '/' # Call first argument with rest of line as arguments
296 299
297 300 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
298 301 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
299 302
300 303 def _make_help_call(target, esc, next_input=None):
301 304 """Prepares a pinfo(2)/psearch call from a target name and the escape
302 305 (i.e. ? or ??)"""
303 306 method = 'pinfo2' if esc == '??' \
304 307 else 'psearch' if '*' in target \
305 308 else 'pinfo'
306 309 arg = " ".join([method, target])
307 310 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
308 311 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
309 312 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
310 313 if next_input is None:
311 314 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
312 315 else:
313 316 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
314 317 (next_input, t_magic_name, t_magic_arg_s)
315 318
316 319 def _tr_help(content):
317 320 """Translate lines escaped with: ?
318 321
319 322 A naked help line should fire the intro help screen (shell.show_usage())
320 323 """
321 324 if not content:
322 325 return 'get_ipython().show_usage()'
323 326
324 327 return _make_help_call(content, '?')
325 328
326 329 def _tr_help2(content):
327 330 """Translate lines escaped with: ??
328 331
329 332 A naked help line should fire the intro help screen (shell.show_usage())
330 333 """
331 334 if not content:
332 335 return 'get_ipython().show_usage()'
333 336
334 337 return _make_help_call(content, '??')
335 338
336 339 def _tr_magic(content):
337 340 "Translate lines escaped with a percent sign: %"
338 341 name, _, args = content.partition(' ')
339 342 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
340 343
341 344 def _tr_quote(content):
342 345 "Translate lines escaped with a comma: ,"
343 346 name, _, args = content.partition(' ')
344 347 return '%s("%s")' % (name, '", "'.join(args.split()) )
345 348
346 349 def _tr_quote2(content):
347 350 "Translate lines escaped with a semicolon: ;"
348 351 name, _, args = content.partition(' ')
349 352 return '%s("%s")' % (name, args)
350 353
351 354 def _tr_paren(content):
352 355 "Translate lines escaped with a slash: /"
353 356 name, _, args = content.partition(' ')
354 357 return '%s(%s)' % (name, ", ".join(args.split()))
355 358
356 359 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
357 360 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
358 361 ESC_HELP : _tr_help,
359 362 ESC_HELP2 : _tr_help2,
360 363 ESC_MAGIC : _tr_magic,
361 364 ESC_QUOTE : _tr_quote,
362 365 ESC_QUOTE2 : _tr_quote2,
363 366 ESC_PAREN : _tr_paren }
364 367
365 368 class EscapedCommand(TokenTransformBase):
366 369 """Transformer for escaped commands like %foo, !foo, or /foo"""
367 370 @classmethod
368 371 def find(cls, tokens_by_line):
369 372 """Find the first escaped command (%foo, !foo, etc.) in the cell.
370 373 """
371 374 for line in tokens_by_line:
372 375 if not line:
373 376 continue
374 377 ix = 0
375 378 ll = len(line)
376 379 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
377 380 ix += 1
378 381 if ix >= ll:
379 382 continue
380 383 if line[ix].string in ESCAPE_SINGLES:
381 384 return cls(line[ix].start)
382 385
383 386 def transform(self, lines):
384 387 """Transform an escaped line found by the ``find()`` classmethod.
385 388 """
386 389 start_line, start_col = self.start_line, self.start_col
387 390
388 391 indent = lines[start_line][:start_col]
389 392 end_line = find_end_of_continued_line(lines, start_line)
390 393 line = assemble_continued_line(lines, (start_line, start_col), end_line)
391 394
392 395 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
393 396 escape, content = line[:2], line[2:]
394 397 else:
395 398 escape, content = line[:1], line[1:]
396 399
397 400 if escape in tr:
398 401 call = tr[escape](content)
399 402 else:
400 403 call = ''
401 404
402 405 lines_before = lines[:start_line]
403 406 new_line = indent + call + '\n'
404 407 lines_after = lines[end_line + 1:]
405 408
406 409 return lines_before + [new_line] + lines_after
407 410
408 411 _help_end_re = re.compile(r"""(%{0,2}
409 412 (?!\d)[\w*]+ # Variable name
410 413 (\.(?!\d)[\w*]+)* # .etc.etc
411 414 )
412 415 (\?\??)$ # ? or ??
413 416 """,
414 417 re.VERBOSE)
415 418
416 419 class HelpEnd(TokenTransformBase):
417 420 """Transformer for help syntax: obj? and obj??"""
418 421 # This needs to be higher priority (lower number) than EscapedCommand so
419 422 # that inspecting magics (%foo?) works.
420 423 priority = 5
421 424
422 425 def __init__(self, start, q_locn):
423 426 super().__init__(start)
424 427 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
425 428 self.q_col = q_locn[1]
426 429
427 430 @classmethod
428 431 def find(cls, tokens_by_line):
429 432 """Find the first help command (foo?) in the cell.
430 433 """
431 434 for line in tokens_by_line:
432 435 # Last token is NEWLINE; look at last but one
433 436 if len(line) > 2 and line[-2].string == '?':
434 437 # Find the first token that's not INDENT/DEDENT
435 438 ix = 0
436 439 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
437 440 ix += 1
438 441 return cls(line[ix].start, line[-2].start)
439 442
440 443 def transform(self, lines):
441 444 """Transform a help command found by the ``find()`` classmethod.
442 445 """
443 446 piece = ''.join(lines[self.start_line:self.q_line+1])
444 447 indent, content = piece[:self.start_col], piece[self.start_col:]
445 448 lines_before = lines[:self.start_line]
446 449 lines_after = lines[self.q_line + 1:]
447 450
448 451 m = _help_end_re.search(content)
449 452 if not m:
450 453 raise SyntaxError(content)
451 454 assert m is not None, content
452 455 target = m.group(1)
453 456 esc = m.group(3)
454 457
455 458 # If we're mid-command, put it back on the next prompt for the user.
456 459 next_input = None
457 460 if (not lines_before) and (not lines_after) \
458 461 and content.strip() != m.group(0):
459 462 next_input = content.rstrip('?\n')
460 463
461 464 call = _make_help_call(target, esc, next_input=next_input)
462 465 new_line = indent + call + '\n'
463 466
464 467 return lines_before + [new_line] + lines_after
465 468
466 469 def make_tokens_by_line(lines:List[str]):
467 470 """Tokenize a series of lines and group tokens by line.
468 471
469 472 The tokens for a multiline Python string or expression are grouped as one
470 473 line. All lines except the last lines should keep their line ending ('\\n',
471 474 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
472 475 for example when passing block of text to this function.
473 476
474 477 """
475 478 # NL tokens are used inside multiline expressions, but also after blank
476 479 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
477 480 # We want to group the former case together but split the latter, so we
478 481 # track parentheses level, similar to the internals of tokenize.
479 482
480 483 # reexported from token on 3.7+
481 484 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
482 485 tokens_by_line:List[List[Any]] = [[]]
483 486 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
484 487 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
485 488 parenlev = 0
486 489 try:
487 490 for token in tokenize.generate_tokens(iter(lines).__next__):
488 491 tokens_by_line[-1].append(token)
489 492 if (token.type == NEWLINE) \
490 493 or ((token.type == NL) and (parenlev <= 0)):
491 494 tokens_by_line.append([])
492 495 elif token.string in {'(', '[', '{'}:
493 496 parenlev += 1
494 497 elif token.string in {')', ']', '}'}:
495 498 if parenlev > 0:
496 499 parenlev -= 1
497 500 except tokenize.TokenError:
498 501 # Input ended in a multiline string or expression. That's OK for us.
499 502 pass
500 503
501 504
502 505 if not tokens_by_line[-1]:
503 506 tokens_by_line.pop()
504 507
505 508
506 509 return tokens_by_line
507 510
508 511 def show_linewise_tokens(s: str):
509 512 """For investigation and debugging"""
510 513 if not s.endswith('\n'):
511 514 s += '\n'
512 515 lines = s.splitlines(keepends=True)
513 516 for line in make_tokens_by_line(lines):
514 517 print("Line -------")
515 518 for tokinfo in line:
516 519 print(" ", tokinfo)
517 520
518 521 # Arbitrary limit to prevent getting stuck in infinite loops
519 522 TRANSFORM_LOOP_LIMIT = 500
520 523
521 524 class TransformerManager:
522 525 """Applies various transformations to a cell or code block.
523 526
524 527 The key methods for external use are ``transform_cell()``
525 528 and ``check_complete()``.
526 529 """
527 530 def __init__(self):
528 531 self.cleanup_transforms = [
529 532 leading_empty_lines,
530 533 leading_indent,
531 534 classic_prompt,
532 535 ipython_prompt,
533 536 ]
534 537 self.line_transforms = [
535 538 cell_magic,
536 539 ]
537 540 self.token_transformers = [
538 541 MagicAssign,
539 542 SystemAssign,
540 543 EscapedCommand,
541 544 HelpEnd,
542 545 ]
543 546
544 547 def do_one_token_transform(self, lines):
545 548 """Find and run the transform earliest in the code.
546 549
547 550 Returns (changed, lines).
548 551
549 552 This method is called repeatedly until changed is False, indicating
550 553 that all available transformations are complete.
551 554
552 555 The tokens following IPython special syntax might not be valid, so
553 556 the transformed code is retokenised every time to identify the next
554 557 piece of special syntax. Hopefully long code cells are mostly valid
555 558 Python, not using lots of IPython special syntax, so this shouldn't be
556 559 a performance issue.
557 560 """
558 561 tokens_by_line = make_tokens_by_line(lines)
559 562 candidates = []
560 563 for transformer_cls in self.token_transformers:
561 564 transformer = transformer_cls.find(tokens_by_line)
562 565 if transformer:
563 566 candidates.append(transformer)
564 567
565 568 if not candidates:
566 569 # Nothing to transform
567 570 return False, lines
568 571 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
569 572 for transformer in ordered_transformers:
570 573 try:
571 574 return True, transformer.transform(lines)
572 575 except SyntaxError:
573 576 pass
574 577 return False, lines
575 578
576 579 def do_token_transforms(self, lines):
577 580 for _ in range(TRANSFORM_LOOP_LIMIT):
578 581 changed, lines = self.do_one_token_transform(lines)
579 582 if not changed:
580 583 return lines
581 584
582 585 raise RuntimeError("Input transformation still changing after "
583 586 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
584 587
585 588 def transform_cell(self, cell: str) -> str:
586 589 """Transforms a cell of input code"""
587 590 if not cell.endswith('\n'):
588 591 cell += '\n' # Ensure the cell has a trailing newline
589 592 lines = cell.splitlines(keepends=True)
590 593 for transform in self.cleanup_transforms + self.line_transforms:
591 594 lines = transform(lines)
592 595
593 596 lines = self.do_token_transforms(lines)
594 597 return ''.join(lines)
595 598
596 599 def check_complete(self, cell: str):
597 600 """Return whether a block of code is ready to execute, or should be continued
598 601
599 602 Parameters
600 603 ----------
601 604 source : string
602 605 Python input code, which can be multiline.
603 606
604 607 Returns
605 608 -------
606 609 status : str
607 610 One of 'complete', 'incomplete', or 'invalid' if source is not a
608 611 prefix of valid code.
609 612 indent_spaces : int or None
610 613 The number of spaces by which to indent the next line of code. If
611 614 status is not 'incomplete', this is None.
612 615 """
613 616 # Remember if the lines ends in a new line.
614 617 ends_with_newline = False
615 618 for character in reversed(cell):
616 619 if character == '\n':
617 620 ends_with_newline = True
618 621 break
619 622 elif character.strip():
620 623 break
621 624 else:
622 625 continue
623 626
624 627 if not ends_with_newline:
625 628 # Append an newline for consistent tokenization
626 629 # See https://bugs.python.org/issue33899
627 630 cell += '\n'
628 631
629 632 lines = cell.splitlines(keepends=True)
630 633
631 634 if not lines:
632 635 return 'complete', None
633 636
634 637 if lines[-1].endswith('\\'):
635 638 # Explicit backslash continuation
636 639 return 'incomplete', find_last_indent(lines)
637 640
638 641 try:
639 642 for transform in self.cleanup_transforms:
640 643 if not getattr(transform, 'has_side_effects', False):
641 644 lines = transform(lines)
642 645 except SyntaxError:
643 646 return 'invalid', None
644 647
645 648 if lines[0].startswith('%%'):
646 649 # Special case for cell magics - completion marked by blank line
647 650 if lines[-1].strip():
648 651 return 'incomplete', find_last_indent(lines)
649 652 else:
650 653 return 'complete', None
651 654
652 655 try:
653 656 for transform in self.line_transforms:
654 657 if not getattr(transform, 'has_side_effects', False):
655 658 lines = transform(lines)
656 659 lines = self.do_token_transforms(lines)
657 660 except SyntaxError:
658 661 return 'invalid', None
659 662
660 663 tokens_by_line = make_tokens_by_line(lines)
661 664
662 665 if not tokens_by_line:
663 666 return 'incomplete', find_last_indent(lines)
664 667
665 668 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
666 669 # We're in a multiline string or expression
667 670 return 'incomplete', find_last_indent(lines)
668 671
669 672 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
670 673
671 674 # Pop the last line which only contains DEDENTs and ENDMARKER
672 675 last_token_line = None
673 676 if {t.type for t in tokens_by_line[-1]} in [
674 677 {tokenize.DEDENT, tokenize.ENDMARKER},
675 678 {tokenize.ENDMARKER}
676 679 ] and len(tokens_by_line) > 1:
677 680 last_token_line = tokens_by_line.pop()
678 681
679 682 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
680 683 tokens_by_line[-1].pop()
681 684
682 685 if not tokens_by_line[-1]:
683 686 return 'incomplete', find_last_indent(lines)
684 687
685 688 if tokens_by_line[-1][-1].string == ':':
686 689 # The last line starts a block (e.g. 'if foo:')
687 690 ix = 0
688 691 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
689 692 ix += 1
690 693
691 694 indent = tokens_by_line[-1][ix].start[1]
692 695 return 'incomplete', indent + 4
693 696
694 697 if tokens_by_line[-1][0].line.endswith('\\'):
695 698 return 'incomplete', None
696 699
697 700 # At this point, our checks think the code is complete (or invalid).
698 701 # We'll use codeop.compile_command to check this with the real parser
699 702 try:
700 703 with warnings.catch_warnings():
701 704 warnings.simplefilter('error', SyntaxWarning)
702 705 res = compile_command(''.join(lines), symbol='exec')
703 706 except (SyntaxError, OverflowError, ValueError, TypeError,
704 707 MemoryError, SyntaxWarning):
705 708 return 'invalid', None
706 709 else:
707 710 if res is None:
708 711 return 'incomplete', find_last_indent(lines)
709 712
710 713 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
711 714 if ends_with_newline:
712 715 return 'complete', None
713 716 return 'incomplete', find_last_indent(lines)
714 717
715 718 # If there's a blank line at the end, assume we're ready to execute
716 719 if not lines[-1].strip():
717 720 return 'complete', None
718 721
719 722 return 'complete', None
720 723
721 724
722 725 def find_last_indent(lines):
723 726 m = _indent_re.match(lines[-1])
724 727 if not m:
725 728 return 0
726 729 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now