##// END OF EJS Templates
Backport PR #12503: try to fix doc build
Matthias Bussonnier -
Show More
@@ -1,723 +1,726 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Union
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 Notes
65 -----
66
64 67 If initial_re and prompt_re differ,
65 68 only initial_re will be tested against the first line.
66 69 If any prompt is found on the first two lines,
67 70 prompts will be stripped from the rest of the block.
68 71 """
69 72 def __init__(self, prompt_re, initial_re=None):
70 73 self.prompt_re = prompt_re
71 74 self.initial_re = initial_re or prompt_re
72 75
73 76 def _strip(self, lines):
74 77 return [self.prompt_re.sub('', l, count=1) for l in lines]
75 78
76 79 def __call__(self, lines):
77 80 if not lines:
78 81 return lines
79 82 if self.initial_re.match(lines[0]) or \
80 83 (len(lines) > 1 and self.prompt_re.match(lines[1])):
81 84 return self._strip(lines)
82 85 return lines
83 86
84 87 classic_prompt = PromptStripper(
85 88 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
86 89 initial_re=re.compile(r'^>>>( |$)')
87 90 )
88 91
89 92 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
90 93
91 94 def cell_magic(lines):
92 95 if not lines or not lines[0].startswith('%%'):
93 96 return lines
94 97 if re.match(r'%%\w+\?', lines[0]):
95 98 # This case will be handled by help_end
96 99 return lines
97 100 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
98 101 body = ''.join(lines[1:])
99 102 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
100 103 % (magic_name, first_line, body)]
101 104
102 105
103 106 def _find_assign_op(token_line) -> Union[int, None]:
104 107 """Get the index of the first assignment in the line ('=' not inside brackets)
105 108
106 109 Note: We don't try to support multiple special assignment (a = b = %foo)
107 110 """
108 111 paren_level = 0
109 112 for i, ti in enumerate(token_line):
110 113 s = ti.string
111 114 if s == '=' and paren_level == 0:
112 115 return i
113 116 if s in {'(','[','{'}:
114 117 paren_level += 1
115 118 elif s in {')', ']', '}'}:
116 119 if paren_level > 0:
117 120 paren_level -= 1
118 121
119 122 def find_end_of_continued_line(lines, start_line: int):
120 123 """Find the last line of a line explicitly extended using backslashes.
121 124
122 125 Uses 0-indexed line numbers.
123 126 """
124 127 end_line = start_line
125 128 while lines[end_line].endswith('\\\n'):
126 129 end_line += 1
127 130 if end_line >= len(lines):
128 131 break
129 132 return end_line
130 133
131 134 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
132 135 r"""Assemble a single line from multiple continued line pieces
133 136
134 137 Continued lines are lines ending in ``\``, and the line following the last
135 138 ``\`` in the block.
136 139
137 140 For example, this code continues over multiple lines::
138 141
139 142 if (assign_ix is not None) \
140 143 and (len(line) >= assign_ix + 2) \
141 144 and (line[assign_ix+1].string == '%') \
142 145 and (line[assign_ix+2].type == tokenize.NAME):
143 146
144 147 This statement contains four continued line pieces.
145 148 Assembling these pieces into a single line would give::
146 149
147 150 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
148 151
149 152 This uses 0-indexed line numbers. *start* is (lineno, colno).
150 153
151 154 Used to allow ``%magic`` and ``!system`` commands to be continued over
152 155 multiple lines.
153 156 """
154 157 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
155 158 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
156 159 + [parts[-1].rstrip()]) # Strip newline from last line
157 160
158 161 class TokenTransformBase:
159 162 """Base class for transformations which examine tokens.
160 163
161 164 Special syntax should not be transformed when it occurs inside strings or
162 165 comments. This is hard to reliably avoid with regexes. The solution is to
163 166 tokenise the code as Python, and recognise the special syntax in the tokens.
164 167
165 168 IPython's special syntax is not valid Python syntax, so tokenising may go
166 169 wrong after the special syntax starts. These classes therefore find and
167 170 transform *one* instance of special syntax at a time into regular Python
168 171 syntax. After each transformation, tokens are regenerated to find the next
169 172 piece of special syntax.
170 173
171 174 Subclasses need to implement one class method (find)
172 175 and one regular method (transform).
173 176
174 177 The priority attribute can select which transformation to apply if multiple
175 178 transformers match in the same place. Lower numbers have higher priority.
176 179 This allows "%magic?" to be turned into a help call rather than a magic call.
177 180 """
178 181 # Lower numbers -> higher priority (for matches in the same location)
179 182 priority = 10
180 183
181 184 def sortby(self):
182 185 return self.start_line, self.start_col, self.priority
183 186
184 187 def __init__(self, start):
185 188 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
186 189 self.start_col = start[1]
187 190
188 191 @classmethod
189 192 def find(cls, tokens_by_line):
190 193 """Find one instance of special syntax in the provided tokens.
191 194
192 195 Tokens are grouped into logical lines for convenience,
193 196 so it is easy to e.g. look at the first token of each line.
194 197 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
195 198
196 199 This should return an instance of its class, pointing to the start
197 200 position it has found, or None if it found no match.
198 201 """
199 202 raise NotImplementedError
200 203
201 204 def transform(self, lines: List[str]):
202 205 """Transform one instance of special syntax found by ``find()``
203 206
204 207 Takes a list of strings representing physical lines,
205 208 returns a similar list of transformed lines.
206 209 """
207 210 raise NotImplementedError
208 211
209 212 class MagicAssign(TokenTransformBase):
210 213 """Transformer for assignments from magics (a = %foo)"""
211 214 @classmethod
212 215 def find(cls, tokens_by_line):
213 216 """Find the first magic assignment (a = %foo) in the cell.
214 217 """
215 218 for line in tokens_by_line:
216 219 assign_ix = _find_assign_op(line)
217 220 if (assign_ix is not None) \
218 221 and (len(line) >= assign_ix + 2) \
219 222 and (line[assign_ix+1].string == '%') \
220 223 and (line[assign_ix+2].type == tokenize.NAME):
221 224 return cls(line[assign_ix+1].start)
222 225
223 226 def transform(self, lines: List[str]):
224 227 """Transform a magic assignment found by the ``find()`` classmethod.
225 228 """
226 229 start_line, start_col = self.start_line, self.start_col
227 230 lhs = lines[start_line][:start_col]
228 231 end_line = find_end_of_continued_line(lines, start_line)
229 232 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
230 233 assert rhs.startswith('%'), rhs
231 234 magic_name, _, args = rhs[1:].partition(' ')
232 235
233 236 lines_before = lines[:start_line]
234 237 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
235 238 new_line = lhs + call + '\n'
236 239 lines_after = lines[end_line+1:]
237 240
238 241 return lines_before + [new_line] + lines_after
239 242
240 243
241 244 class SystemAssign(TokenTransformBase):
242 245 """Transformer for assignments from system commands (a = !foo)"""
243 246 @classmethod
244 247 def find(cls, tokens_by_line):
245 248 """Find the first system assignment (a = !foo) in the cell.
246 249 """
247 250 for line in tokens_by_line:
248 251 assign_ix = _find_assign_op(line)
249 252 if (assign_ix is not None) \
250 253 and not line[assign_ix].line.strip().startswith('=') \
251 254 and (len(line) >= assign_ix + 2) \
252 255 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
253 256 ix = assign_ix + 1
254 257
255 258 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
256 259 if line[ix].string == '!':
257 260 return cls(line[ix].start)
258 261 elif not line[ix].string.isspace():
259 262 break
260 263 ix += 1
261 264
262 265 def transform(self, lines: List[str]):
263 266 """Transform a system assignment found by the ``find()`` classmethod.
264 267 """
265 268 start_line, start_col = self.start_line, self.start_col
266 269
267 270 lhs = lines[start_line][:start_col]
268 271 end_line = find_end_of_continued_line(lines, start_line)
269 272 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
270 273 assert rhs.startswith('!'), rhs
271 274 cmd = rhs[1:]
272 275
273 276 lines_before = lines[:start_line]
274 277 call = "get_ipython().getoutput({!r})".format(cmd)
275 278 new_line = lhs + call + '\n'
276 279 lines_after = lines[end_line + 1:]
277 280
278 281 return lines_before + [new_line] + lines_after
279 282
280 283 # The escape sequences that define the syntax transformations IPython will
281 284 # apply to user input. These can NOT be just changed here: many regular
282 285 # expressions and other parts of the code may use their hardcoded values, and
283 286 # for all intents and purposes they constitute the 'IPython syntax', so they
284 287 # should be considered fixed.
285 288
286 289 ESC_SHELL = '!' # Send line to underlying system shell
287 290 ESC_SH_CAP = '!!' # Send line to system shell and capture output
288 291 ESC_HELP = '?' # Find information about object
289 292 ESC_HELP2 = '??' # Find extra-detailed information about object
290 293 ESC_MAGIC = '%' # Call magic function
291 294 ESC_MAGIC2 = '%%' # Call cell-magic function
292 295 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
293 296 ESC_QUOTE2 = ';' # Quote all args as a single string, call
294 297 ESC_PAREN = '/' # Call first argument with rest of line as arguments
295 298
296 299 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
297 300 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
298 301
299 302 def _make_help_call(target, esc, next_input=None):
300 303 """Prepares a pinfo(2)/psearch call from a target name and the escape
301 304 (i.e. ? or ??)"""
302 305 method = 'pinfo2' if esc == '??' \
303 306 else 'psearch' if '*' in target \
304 307 else 'pinfo'
305 308 arg = " ".join([method, target])
306 309 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
307 310 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
308 311 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
309 312 if next_input is None:
310 313 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
311 314 else:
312 315 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
313 316 (next_input, t_magic_name, t_magic_arg_s)
314 317
315 318 def _tr_help(content):
316 319 """Translate lines escaped with: ?
317 320
318 321 A naked help line should fire the intro help screen (shell.show_usage())
319 322 """
320 323 if not content:
321 324 return 'get_ipython().show_usage()'
322 325
323 326 return _make_help_call(content, '?')
324 327
325 328 def _tr_help2(content):
326 329 """Translate lines escaped with: ??
327 330
328 331 A naked help line should fire the intro help screen (shell.show_usage())
329 332 """
330 333 if not content:
331 334 return 'get_ipython().show_usage()'
332 335
333 336 return _make_help_call(content, '??')
334 337
335 338 def _tr_magic(content):
336 339 "Translate lines escaped with a percent sign: %"
337 340 name, _, args = content.partition(' ')
338 341 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
339 342
340 343 def _tr_quote(content):
341 344 "Translate lines escaped with a comma: ,"
342 345 name, _, args = content.partition(' ')
343 346 return '%s("%s")' % (name, '", "'.join(args.split()) )
344 347
345 348 def _tr_quote2(content):
346 349 "Translate lines escaped with a semicolon: ;"
347 350 name, _, args = content.partition(' ')
348 351 return '%s("%s")' % (name, args)
349 352
350 353 def _tr_paren(content):
351 354 "Translate lines escaped with a slash: /"
352 355 name, _, args = content.partition(' ')
353 356 return '%s(%s)' % (name, ", ".join(args.split()))
354 357
355 358 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
356 359 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
357 360 ESC_HELP : _tr_help,
358 361 ESC_HELP2 : _tr_help2,
359 362 ESC_MAGIC : _tr_magic,
360 363 ESC_QUOTE : _tr_quote,
361 364 ESC_QUOTE2 : _tr_quote2,
362 365 ESC_PAREN : _tr_paren }
363 366
364 367 class EscapedCommand(TokenTransformBase):
365 368 """Transformer for escaped commands like %foo, !foo, or /foo"""
366 369 @classmethod
367 370 def find(cls, tokens_by_line):
368 371 """Find the first escaped command (%foo, !foo, etc.) in the cell.
369 372 """
370 373 for line in tokens_by_line:
371 374 if not line:
372 375 continue
373 376 ix = 0
374 377 ll = len(line)
375 378 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
376 379 ix += 1
377 380 if ix >= ll:
378 381 continue
379 382 if line[ix].string in ESCAPE_SINGLES:
380 383 return cls(line[ix].start)
381 384
382 385 def transform(self, lines):
383 386 """Transform an escaped line found by the ``find()`` classmethod.
384 387 """
385 388 start_line, start_col = self.start_line, self.start_col
386 389
387 390 indent = lines[start_line][:start_col]
388 391 end_line = find_end_of_continued_line(lines, start_line)
389 392 line = assemble_continued_line(lines, (start_line, start_col), end_line)
390 393
391 394 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
392 395 escape, content = line[:2], line[2:]
393 396 else:
394 397 escape, content = line[:1], line[1:]
395 398
396 399 if escape in tr:
397 400 call = tr[escape](content)
398 401 else:
399 402 call = ''
400 403
401 404 lines_before = lines[:start_line]
402 405 new_line = indent + call + '\n'
403 406 lines_after = lines[end_line + 1:]
404 407
405 408 return lines_before + [new_line] + lines_after
406 409
407 410 _help_end_re = re.compile(r"""(%{0,2}
408 411 (?!\d)[\w*]+ # Variable name
409 412 (\.(?!\d)[\w*]+)* # .etc.etc
410 413 )
411 414 (\?\??)$ # ? or ??
412 415 """,
413 416 re.VERBOSE)
414 417
415 418 class HelpEnd(TokenTransformBase):
416 419 """Transformer for help syntax: obj? and obj??"""
417 420 # This needs to be higher priority (lower number) than EscapedCommand so
418 421 # that inspecting magics (%foo?) works.
419 422 priority = 5
420 423
421 424 def __init__(self, start, q_locn):
422 425 super().__init__(start)
423 426 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
424 427 self.q_col = q_locn[1]
425 428
426 429 @classmethod
427 430 def find(cls, tokens_by_line):
428 431 """Find the first help command (foo?) in the cell.
429 432 """
430 433 for line in tokens_by_line:
431 434 # Last token is NEWLINE; look at last but one
432 435 if len(line) > 2 and line[-2].string == '?':
433 436 # Find the first token that's not INDENT/DEDENT
434 437 ix = 0
435 438 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
436 439 ix += 1
437 440 return cls(line[ix].start, line[-2].start)
438 441
439 442 def transform(self, lines):
440 443 """Transform a help command found by the ``find()`` classmethod.
441 444 """
442 445 piece = ''.join(lines[self.start_line:self.q_line+1])
443 446 indent, content = piece[:self.start_col], piece[self.start_col:]
444 447 lines_before = lines[:self.start_line]
445 448 lines_after = lines[self.q_line + 1:]
446 449
447 450 m = _help_end_re.search(content)
448 451 if not m:
449 452 raise SyntaxError(content)
450 453 assert m is not None, content
451 454 target = m.group(1)
452 455 esc = m.group(3)
453 456
454 457 # If we're mid-command, put it back on the next prompt for the user.
455 458 next_input = None
456 459 if (not lines_before) and (not lines_after) \
457 460 and content.strip() != m.group(0):
458 461 next_input = content.rstrip('?\n')
459 462
460 463 call = _make_help_call(target, esc, next_input=next_input)
461 464 new_line = indent + call + '\n'
462 465
463 466 return lines_before + [new_line] + lines_after
464 467
465 468 def make_tokens_by_line(lines:List[str]):
466 469 """Tokenize a series of lines and group tokens by line.
467 470
468 471 The tokens for a multiline Python string or expression are grouped as one
469 472 line. All lines except the last lines should keep their line ending ('\\n',
470 473 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
471 474 for example when passing block of text to this function.
472 475
473 476 """
474 477 # NL tokens are used inside multiline expressions, but also after blank
475 478 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
476 479 # We want to group the former case together but split the latter, so we
477 480 # track parentheses level, similar to the internals of tokenize.
478 481 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
479 482 tokens_by_line = [[]]
480 483 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
481 484 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
482 485 parenlev = 0
483 486 try:
484 487 for token in tokenize.generate_tokens(iter(lines).__next__):
485 488 tokens_by_line[-1].append(token)
486 489 if (token.type == NEWLINE) \
487 490 or ((token.type == NL) and (parenlev <= 0)):
488 491 tokens_by_line.append([])
489 492 elif token.string in {'(', '[', '{'}:
490 493 parenlev += 1
491 494 elif token.string in {')', ']', '}'}:
492 495 if parenlev > 0:
493 496 parenlev -= 1
494 497 except tokenize.TokenError:
495 498 # Input ended in a multiline string or expression. That's OK for us.
496 499 pass
497 500
498 501
499 502 if not tokens_by_line[-1]:
500 503 tokens_by_line.pop()
501 504
502 505
503 506 return tokens_by_line
504 507
505 508 def show_linewise_tokens(s: str):
506 509 """For investigation and debugging"""
507 510 if not s.endswith('\n'):
508 511 s += '\n'
509 512 lines = s.splitlines(keepends=True)
510 513 for line in make_tokens_by_line(lines):
511 514 print("Line -------")
512 515 for tokinfo in line:
513 516 print(" ", tokinfo)
514 517
515 518 # Arbitrary limit to prevent getting stuck in infinite loops
516 519 TRANSFORM_LOOP_LIMIT = 500
517 520
518 521 class TransformerManager:
519 522 """Applies various transformations to a cell or code block.
520 523
521 524 The key methods for external use are ``transform_cell()``
522 525 and ``check_complete()``.
523 526 """
524 527 def __init__(self):
525 528 self.cleanup_transforms = [
526 529 leading_empty_lines,
527 530 leading_indent,
528 531 classic_prompt,
529 532 ipython_prompt,
530 533 ]
531 534 self.line_transforms = [
532 535 cell_magic,
533 536 ]
534 537 self.token_transformers = [
535 538 MagicAssign,
536 539 SystemAssign,
537 540 EscapedCommand,
538 541 HelpEnd,
539 542 ]
540 543
541 544 def do_one_token_transform(self, lines):
542 545 """Find and run the transform earliest in the code.
543 546
544 547 Returns (changed, lines).
545 548
546 549 This method is called repeatedly until changed is False, indicating
547 550 that all available transformations are complete.
548 551
549 552 The tokens following IPython special syntax might not be valid, so
550 553 the transformed code is retokenised every time to identify the next
551 554 piece of special syntax. Hopefully long code cells are mostly valid
552 555 Python, not using lots of IPython special syntax, so this shouldn't be
553 556 a performance issue.
554 557 """
555 558 tokens_by_line = make_tokens_by_line(lines)
556 559 candidates = []
557 560 for transformer_cls in self.token_transformers:
558 561 transformer = transformer_cls.find(tokens_by_line)
559 562 if transformer:
560 563 candidates.append(transformer)
561 564
562 565 if not candidates:
563 566 # Nothing to transform
564 567 return False, lines
565 568 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
566 569 for transformer in ordered_transformers:
567 570 try:
568 571 return True, transformer.transform(lines)
569 572 except SyntaxError:
570 573 pass
571 574 return False, lines
572 575
573 576 def do_token_transforms(self, lines):
574 577 for _ in range(TRANSFORM_LOOP_LIMIT):
575 578 changed, lines = self.do_one_token_transform(lines)
576 579 if not changed:
577 580 return lines
578 581
579 582 raise RuntimeError("Input transformation still changing after "
580 583 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
581 584
582 585 def transform_cell(self, cell: str) -> str:
583 586 """Transforms a cell of input code"""
584 587 if not cell.endswith('\n'):
585 588 cell += '\n' # Ensure the cell has a trailing newline
586 589 lines = cell.splitlines(keepends=True)
587 590 for transform in self.cleanup_transforms + self.line_transforms:
588 591 lines = transform(lines)
589 592
590 593 lines = self.do_token_transforms(lines)
591 594 return ''.join(lines)
592 595
593 596 def check_complete(self, cell: str):
594 597 """Return whether a block of code is ready to execute, or should be continued
595 598
596 599 Parameters
597 600 ----------
598 601 source : string
599 602 Python input code, which can be multiline.
600 603
601 604 Returns
602 605 -------
603 606 status : str
604 607 One of 'complete', 'incomplete', or 'invalid' if source is not a
605 608 prefix of valid code.
606 609 indent_spaces : int or None
607 610 The number of spaces by which to indent the next line of code. If
608 611 status is not 'incomplete', this is None.
609 612 """
610 613 # Remember if the lines ends in a new line.
611 614 ends_with_newline = False
612 615 for character in reversed(cell):
613 616 if character == '\n':
614 617 ends_with_newline = True
615 618 break
616 619 elif character.strip():
617 620 break
618 621 else:
619 622 continue
620 623
621 624 if not ends_with_newline:
622 625 # Append an newline for consistent tokenization
623 626 # See https://bugs.python.org/issue33899
624 627 cell += '\n'
625 628
626 629 lines = cell.splitlines(keepends=True)
627 630
628 631 if not lines:
629 632 return 'complete', None
630 633
631 634 if lines[-1].endswith('\\'):
632 635 # Explicit backslash continuation
633 636 return 'incomplete', find_last_indent(lines)
634 637
635 638 try:
636 639 for transform in self.cleanup_transforms:
637 640 if not getattr(transform, 'has_side_effects', False):
638 641 lines = transform(lines)
639 642 except SyntaxError:
640 643 return 'invalid', None
641 644
642 645 if lines[0].startswith('%%'):
643 646 # Special case for cell magics - completion marked by blank line
644 647 if lines[-1].strip():
645 648 return 'incomplete', find_last_indent(lines)
646 649 else:
647 650 return 'complete', None
648 651
649 652 try:
650 653 for transform in self.line_transforms:
651 654 if not getattr(transform, 'has_side_effects', False):
652 655 lines = transform(lines)
653 656 lines = self.do_token_transforms(lines)
654 657 except SyntaxError:
655 658 return 'invalid', None
656 659
657 660 tokens_by_line = make_tokens_by_line(lines)
658 661
659 662 if not tokens_by_line:
660 663 return 'incomplete', find_last_indent(lines)
661 664
662 665 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
663 666 # We're in a multiline string or expression
664 667 return 'incomplete', find_last_indent(lines)
665 668
666 669 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
667 670
668 671 # Pop the last line which only contains DEDENTs and ENDMARKER
669 672 last_token_line = None
670 673 if {t.type for t in tokens_by_line[-1]} in [
671 674 {tokenize.DEDENT, tokenize.ENDMARKER},
672 675 {tokenize.ENDMARKER}
673 676 ] and len(tokens_by_line) > 1:
674 677 last_token_line = tokens_by_line.pop()
675 678
676 679 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
677 680 tokens_by_line[-1].pop()
678 681
679 682 if not tokens_by_line[-1]:
680 683 return 'incomplete', find_last_indent(lines)
681 684
682 685 if tokens_by_line[-1][-1].string == ':':
683 686 # The last line starts a block (e.g. 'if foo:')
684 687 ix = 0
685 688 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
686 689 ix += 1
687 690
688 691 indent = tokens_by_line[-1][ix].start[1]
689 692 return 'incomplete', indent + 4
690 693
691 694 if tokens_by_line[-1][0].line.endswith('\\'):
692 695 return 'incomplete', None
693 696
694 697 # At this point, our checks think the code is complete (or invalid).
695 698 # We'll use codeop.compile_command to check this with the real parser
696 699 try:
697 700 with warnings.catch_warnings():
698 701 warnings.simplefilter('error', SyntaxWarning)
699 702 res = compile_command(''.join(lines), symbol='exec')
700 703 except (SyntaxError, OverflowError, ValueError, TypeError,
701 704 MemoryError, SyntaxWarning):
702 705 return 'invalid', None
703 706 else:
704 707 if res is None:
705 708 return 'incomplete', find_last_indent(lines)
706 709
707 710 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
708 711 if ends_with_newline:
709 712 return 'complete', None
710 713 return 'incomplete', find_last_indent(lines)
711 714
712 715 # If there's a blank line at the end, assume we're ready to execute
713 716 if not lines[-1].strip():
714 717 return 'complete', None
715 718
716 719 return 'complete', None
717 720
718 721
719 722 def find_last_indent(lines):
720 723 m = _indent_re.match(lines[-1])
721 724 if not m:
722 725 return 0
723 726 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now