##// END OF EJS Templates
Use custom CommandCompiler for input validation
Artur Svistunov -
Show More
@@ -1,775 +1,794 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 from codeop import compile_command
13 import ast
14 import sys
15 from codeop import CommandCompiler
14 16 import re
15 17 import tokenize
16 18 from typing import List, Tuple, Optional, Any
17 19 import warnings
18 20
19 21 _indent_re = re.compile(r'^[ \t]+')
20 22
21 23 def leading_empty_lines(lines):
22 24 """Remove leading empty lines
23 25
24 26 If the leading lines are empty or contain only whitespace, they will be
25 27 removed.
26 28 """
27 29 if not lines:
28 30 return lines
29 31 for i, line in enumerate(lines):
30 32 if line and not line.isspace():
31 33 return lines[i:]
32 34 return lines
33 35
34 36 def leading_indent(lines):
35 37 """Remove leading indentation.
36 38
37 39 If the first line starts with a spaces or tabs, the same whitespace will be
38 40 removed from each following line in the cell.
39 41 """
40 42 if not lines:
41 43 return lines
42 44 m = _indent_re.match(lines[0])
43 45 if not m:
44 46 return lines
45 47 space = m.group(0)
46 48 n = len(space)
47 49 return [l[n:] if l.startswith(space) else l
48 50 for l in lines]
49 51
50 52 class PromptStripper:
51 53 """Remove matching input prompts from a block of input.
52 54
53 55 Parameters
54 56 ----------
55 57 prompt_re : regular expression
56 58 A regular expression matching any input prompt (including continuation,
57 59 e.g. ``...``)
58 60 initial_re : regular expression, optional
59 61 A regular expression matching only the initial prompt, but not continuation.
60 62 If no initial expression is given, prompt_re will be used everywhere.
61 63 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 64 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 65
64 66 Notes
65 67 -----
66 68
67 69 If initial_re and prompt_re differ,
68 70 only initial_re will be tested against the first line.
69 71 If any prompt is found on the first two lines,
70 72 prompts will be stripped from the rest of the block.
71 73 """
72 74 def __init__(self, prompt_re, initial_re=None):
73 75 self.prompt_re = prompt_re
74 76 self.initial_re = initial_re or prompt_re
75 77
76 78 def _strip(self, lines):
77 79 return [self.prompt_re.sub('', l, count=1) for l in lines]
78 80
79 81 def __call__(self, lines):
80 82 if not lines:
81 83 return lines
82 84 if self.initial_re.match(lines[0]) or \
83 85 (len(lines) > 1 and self.prompt_re.match(lines[1])):
84 86 return self._strip(lines)
85 87 return lines
86 88
87 89 classic_prompt = PromptStripper(
88 90 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
89 91 initial_re=re.compile(r'^>>>( |$)')
90 92 )
91 93
92 94 ipython_prompt = PromptStripper(
93 95 re.compile(
94 96 r"""
95 97 ^( # Match from the beginning of a line, either:
96 98
97 99 # 1. First-line prompt:
98 100 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
99 101 In\ # The 'In' of the prompt, with a space
100 102 \[\d+\]: # Command index, as displayed in the prompt
101 103 \ # With a mandatory trailing space
102 104
103 105 | # ... or ...
104 106
105 107 # 2. The three dots of the multiline prompt
106 108 \s* # All leading whitespace characters
107 109 \.{3,}: # The three (or more) dots
108 110 \ ? # With an optional trailing space
109 111
110 112 )
111 113 """,
112 114 re.VERBOSE,
113 115 )
114 116 )
115 117
116 118
117 119 def cell_magic(lines):
118 120 if not lines or not lines[0].startswith('%%'):
119 121 return lines
120 122 if re.match(r'%%\w+\?', lines[0]):
121 123 # This case will be handled by help_end
122 124 return lines
123 125 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
124 126 body = ''.join(lines[1:])
125 127 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
126 128 % (magic_name, first_line, body)]
127 129
128 130
129 131 def _find_assign_op(token_line) -> Optional[int]:
130 132 """Get the index of the first assignment in the line ('=' not inside brackets)
131 133
132 134 Note: We don't try to support multiple special assignment (a = b = %foo)
133 135 """
134 136 paren_level = 0
135 137 for i, ti in enumerate(token_line):
136 138 s = ti.string
137 139 if s == '=' and paren_level == 0:
138 140 return i
139 141 if s in {'(','[','{'}:
140 142 paren_level += 1
141 143 elif s in {')', ']', '}'}:
142 144 if paren_level > 0:
143 145 paren_level -= 1
144 146 return None
145 147
146 148 def find_end_of_continued_line(lines, start_line: int):
147 149 """Find the last line of a line explicitly extended using backslashes.
148 150
149 151 Uses 0-indexed line numbers.
150 152 """
151 153 end_line = start_line
152 154 while lines[end_line].endswith('\\\n'):
153 155 end_line += 1
154 156 if end_line >= len(lines):
155 157 break
156 158 return end_line
157 159
158 160 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
159 161 r"""Assemble a single line from multiple continued line pieces
160 162
161 163 Continued lines are lines ending in ``\``, and the line following the last
162 164 ``\`` in the block.
163 165
164 166 For example, this code continues over multiple lines::
165 167
166 168 if (assign_ix is not None) \
167 169 and (len(line) >= assign_ix + 2) \
168 170 and (line[assign_ix+1].string == '%') \
169 171 and (line[assign_ix+2].type == tokenize.NAME):
170 172
171 173 This statement contains four continued line pieces.
172 174 Assembling these pieces into a single line would give::
173 175
174 176 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
175 177
176 178 This uses 0-indexed line numbers. *start* is (lineno, colno).
177 179
178 180 Used to allow ``%magic`` and ``!system`` commands to be continued over
179 181 multiple lines.
180 182 """
181 183 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
182 184 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
183 185 + [parts[-1].rstrip()]) # Strip newline from last line
184 186
185 187 class TokenTransformBase:
186 188 """Base class for transformations which examine tokens.
187 189
188 190 Special syntax should not be transformed when it occurs inside strings or
189 191 comments. This is hard to reliably avoid with regexes. The solution is to
190 192 tokenise the code as Python, and recognise the special syntax in the tokens.
191 193
192 194 IPython's special syntax is not valid Python syntax, so tokenising may go
193 195 wrong after the special syntax starts. These classes therefore find and
194 196 transform *one* instance of special syntax at a time into regular Python
195 197 syntax. After each transformation, tokens are regenerated to find the next
196 198 piece of special syntax.
197 199
198 200 Subclasses need to implement one class method (find)
199 201 and one regular method (transform).
200 202
201 203 The priority attribute can select which transformation to apply if multiple
202 204 transformers match in the same place. Lower numbers have higher priority.
203 205 This allows "%magic?" to be turned into a help call rather than a magic call.
204 206 """
205 207 # Lower numbers -> higher priority (for matches in the same location)
206 208 priority = 10
207 209
208 210 def sortby(self):
209 211 return self.start_line, self.start_col, self.priority
210 212
211 213 def __init__(self, start):
212 214 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
213 215 self.start_col = start[1]
214 216
215 217 @classmethod
216 218 def find(cls, tokens_by_line):
217 219 """Find one instance of special syntax in the provided tokens.
218 220
219 221 Tokens are grouped into logical lines for convenience,
220 222 so it is easy to e.g. look at the first token of each line.
221 223 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
222 224
223 225 This should return an instance of its class, pointing to the start
224 226 position it has found, or None if it found no match.
225 227 """
226 228 raise NotImplementedError
227 229
228 230 def transform(self, lines: List[str]):
229 231 """Transform one instance of special syntax found by ``find()``
230 232
231 233 Takes a list of strings representing physical lines,
232 234 returns a similar list of transformed lines.
233 235 """
234 236 raise NotImplementedError
235 237
236 238 class MagicAssign(TokenTransformBase):
237 239 """Transformer for assignments from magics (a = %foo)"""
238 240 @classmethod
239 241 def find(cls, tokens_by_line):
240 242 """Find the first magic assignment (a = %foo) in the cell.
241 243 """
242 244 for line in tokens_by_line:
243 245 assign_ix = _find_assign_op(line)
244 246 if (assign_ix is not None) \
245 247 and (len(line) >= assign_ix + 2) \
246 248 and (line[assign_ix+1].string == '%') \
247 249 and (line[assign_ix+2].type == tokenize.NAME):
248 250 return cls(line[assign_ix+1].start)
249 251
250 252 def transform(self, lines: List[str]):
251 253 """Transform a magic assignment found by the ``find()`` classmethod.
252 254 """
253 255 start_line, start_col = self.start_line, self.start_col
254 256 lhs = lines[start_line][:start_col]
255 257 end_line = find_end_of_continued_line(lines, start_line)
256 258 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
257 259 assert rhs.startswith('%'), rhs
258 260 magic_name, _, args = rhs[1:].partition(' ')
259 261
260 262 lines_before = lines[:start_line]
261 263 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
262 264 new_line = lhs + call + '\n'
263 265 lines_after = lines[end_line+1:]
264 266
265 267 return lines_before + [new_line] + lines_after
266 268
267 269
268 270 class SystemAssign(TokenTransformBase):
269 271 """Transformer for assignments from system commands (a = !foo)"""
270 272 @classmethod
271 273 def find(cls, tokens_by_line):
272 274 """Find the first system assignment (a = !foo) in the cell.
273 275 """
274 276 for line in tokens_by_line:
275 277 assign_ix = _find_assign_op(line)
276 278 if (assign_ix is not None) \
277 279 and not line[assign_ix].line.strip().startswith('=') \
278 280 and (len(line) >= assign_ix + 2) \
279 281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
280 282 ix = assign_ix + 1
281 283
282 284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
283 285 if line[ix].string == '!':
284 286 return cls(line[ix].start)
285 287 elif not line[ix].string.isspace():
286 288 break
287 289 ix += 1
288 290
289 291 def transform(self, lines: List[str]):
290 292 """Transform a system assignment found by the ``find()`` classmethod.
291 293 """
292 294 start_line, start_col = self.start_line, self.start_col
293 295
294 296 lhs = lines[start_line][:start_col]
295 297 end_line = find_end_of_continued_line(lines, start_line)
296 298 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
297 299 assert rhs.startswith('!'), rhs
298 300 cmd = rhs[1:]
299 301
300 302 lines_before = lines[:start_line]
301 303 call = "get_ipython().getoutput({!r})".format(cmd)
302 304 new_line = lhs + call + '\n'
303 305 lines_after = lines[end_line + 1:]
304 306
305 307 return lines_before + [new_line] + lines_after
306 308
307 309 # The escape sequences that define the syntax transformations IPython will
308 310 # apply to user input. These can NOT be just changed here: many regular
309 311 # expressions and other parts of the code may use their hardcoded values, and
310 312 # for all intents and purposes they constitute the 'IPython syntax', so they
311 313 # should be considered fixed.
312 314
313 315 ESC_SHELL = '!' # Send line to underlying system shell
314 316 ESC_SH_CAP = '!!' # Send line to system shell and capture output
315 317 ESC_HELP = '?' # Find information about object
316 318 ESC_HELP2 = '??' # Find extra-detailed information about object
317 319 ESC_MAGIC = '%' # Call magic function
318 320 ESC_MAGIC2 = '%%' # Call cell-magic function
319 321 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
320 322 ESC_QUOTE2 = ';' # Quote all args as a single string, call
321 323 ESC_PAREN = '/' # Call first argument with rest of line as arguments
322 324
323 325 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
324 326 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
325 327
326 328 def _make_help_call(target, esc, next_input=None):
327 329 """Prepares a pinfo(2)/psearch call from a target name and the escape
328 330 (i.e. ? or ??)"""
329 331 method = 'pinfo2' if esc == '??' \
330 332 else 'psearch' if '*' in target \
331 333 else 'pinfo'
332 334 arg = " ".join([method, target])
333 335 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
334 336 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
335 337 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
336 338 if next_input is None:
337 339 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
338 340 else:
339 341 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
340 342 (next_input, t_magic_name, t_magic_arg_s)
341 343
342 344 def _tr_help(content):
343 345 """Translate lines escaped with: ?
344 346
345 347 A naked help line should fire the intro help screen (shell.show_usage())
346 348 """
347 349 if not content:
348 350 return 'get_ipython().show_usage()'
349 351
350 352 return _make_help_call(content, '?')
351 353
352 354 def _tr_help2(content):
353 355 """Translate lines escaped with: ??
354 356
355 357 A naked help line should fire the intro help screen (shell.show_usage())
356 358 """
357 359 if not content:
358 360 return 'get_ipython().show_usage()'
359 361
360 362 return _make_help_call(content, '??')
361 363
362 364 def _tr_magic(content):
363 365 "Translate lines escaped with a percent sign: %"
364 366 name, _, args = content.partition(' ')
365 367 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
366 368
367 369 def _tr_quote(content):
368 370 "Translate lines escaped with a comma: ,"
369 371 name, _, args = content.partition(' ')
370 372 return '%s("%s")' % (name, '", "'.join(args.split()) )
371 373
372 374 def _tr_quote2(content):
373 375 "Translate lines escaped with a semicolon: ;"
374 376 name, _, args = content.partition(' ')
375 377 return '%s("%s")' % (name, args)
376 378
377 379 def _tr_paren(content):
378 380 "Translate lines escaped with a slash: /"
379 381 name, _, args = content.partition(' ')
380 382 return '%s(%s)' % (name, ", ".join(args.split()))
381 383
382 384 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
383 385 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
384 386 ESC_HELP : _tr_help,
385 387 ESC_HELP2 : _tr_help2,
386 388 ESC_MAGIC : _tr_magic,
387 389 ESC_QUOTE : _tr_quote,
388 390 ESC_QUOTE2 : _tr_quote2,
389 391 ESC_PAREN : _tr_paren }
390 392
391 393 class EscapedCommand(TokenTransformBase):
392 394 """Transformer for escaped commands like %foo, !foo, or /foo"""
393 395 @classmethod
394 396 def find(cls, tokens_by_line):
395 397 """Find the first escaped command (%foo, !foo, etc.) in the cell.
396 398 """
397 399 for line in tokens_by_line:
398 400 if not line:
399 401 continue
400 402 ix = 0
401 403 ll = len(line)
402 404 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
403 405 ix += 1
404 406 if ix >= ll:
405 407 continue
406 408 if line[ix].string in ESCAPE_SINGLES:
407 409 return cls(line[ix].start)
408 410
409 411 def transform(self, lines):
410 412 """Transform an escaped line found by the ``find()`` classmethod.
411 413 """
412 414 start_line, start_col = self.start_line, self.start_col
413 415
414 416 indent = lines[start_line][:start_col]
415 417 end_line = find_end_of_continued_line(lines, start_line)
416 418 line = assemble_continued_line(lines, (start_line, start_col), end_line)
417 419
418 420 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
419 421 escape, content = line[:2], line[2:]
420 422 else:
421 423 escape, content = line[:1], line[1:]
422 424
423 425 if escape in tr:
424 426 call = tr[escape](content)
425 427 else:
426 428 call = ''
427 429
428 430 lines_before = lines[:start_line]
429 431 new_line = indent + call + '\n'
430 432 lines_after = lines[end_line + 1:]
431 433
432 434 return lines_before + [new_line] + lines_after
433 435
434 436 _help_end_re = re.compile(r"""(%{0,2}
435 437 (?!\d)[\w*]+ # Variable name
436 438 (\.(?!\d)[\w*]+)* # .etc.etc
437 439 )
438 440 (\?\??)$ # ? or ??
439 441 """,
440 442 re.VERBOSE)
441 443
442 444 class HelpEnd(TokenTransformBase):
443 445 """Transformer for help syntax: obj? and obj??"""
444 446 # This needs to be higher priority (lower number) than EscapedCommand so
445 447 # that inspecting magics (%foo?) works.
446 448 priority = 5
447 449
448 450 def __init__(self, start, q_locn):
449 451 super().__init__(start)
450 452 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
451 453 self.q_col = q_locn[1]
452 454
453 455 @classmethod
454 456 def find(cls, tokens_by_line):
455 457 """Find the first help command (foo?) in the cell.
456 458 """
457 459 for line in tokens_by_line:
458 460 # Last token is NEWLINE; look at last but one
459 461 if len(line) > 2 and line[-2].string == '?':
460 462 # Find the first token that's not INDENT/DEDENT
461 463 ix = 0
462 464 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
463 465 ix += 1
464 466 return cls(line[ix].start, line[-2].start)
465 467
466 468 def transform(self, lines):
467 469 """Transform a help command found by the ``find()`` classmethod.
468 470 """
469 471 piece = ''.join(lines[self.start_line:self.q_line+1])
470 472 indent, content = piece[:self.start_col], piece[self.start_col:]
471 473 lines_before = lines[:self.start_line]
472 474 lines_after = lines[self.q_line + 1:]
473 475
474 476 m = _help_end_re.search(content)
475 477 if not m:
476 478 raise SyntaxError(content)
477 479 assert m is not None, content
478 480 target = m.group(1)
479 481 esc = m.group(3)
480 482
481 483 # If we're mid-command, put it back on the next prompt for the user.
482 484 next_input = None
483 485 if (not lines_before) and (not lines_after) \
484 486 and content.strip() != m.group(0):
485 487 next_input = content.rstrip('?\n')
486 488
487 489 call = _make_help_call(target, esc, next_input=next_input)
488 490 new_line = indent + call + '\n'
489 491
490 492 return lines_before + [new_line] + lines_after
491 493
492 494 def make_tokens_by_line(lines:List[str]):
493 495 """Tokenize a series of lines and group tokens by line.
494 496
495 497 The tokens for a multiline Python string or expression are grouped as one
496 498 line. All lines except the last lines should keep their line ending ('\\n',
497 499 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
498 500 for example when passing block of text to this function.
499 501
500 502 """
501 503 # NL tokens are used inside multiline expressions, but also after blank
502 504 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
503 505 # We want to group the former case together but split the latter, so we
504 506 # track parentheses level, similar to the internals of tokenize.
505 507
506 508 # reexported from token on 3.7+
507 509 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
508 510 tokens_by_line:List[List[Any]] = [[]]
509 511 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
510 512 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
511 513 parenlev = 0
512 514 try:
513 515 for token in tokenize.generate_tokens(iter(lines).__next__):
514 516 tokens_by_line[-1].append(token)
515 517 if (token.type == NEWLINE) \
516 518 or ((token.type == NL) and (parenlev <= 0)):
517 519 tokens_by_line.append([])
518 520 elif token.string in {'(', '[', '{'}:
519 521 parenlev += 1
520 522 elif token.string in {')', ']', '}'}:
521 523 if parenlev > 0:
522 524 parenlev -= 1
523 525 except tokenize.TokenError:
524 526 # Input ended in a multiline string or expression. That's OK for us.
525 527 pass
526 528
527 529
528 530 if not tokens_by_line[-1]:
529 531 tokens_by_line.pop()
530 532
531 533
532 534 return tokens_by_line
533 535
534 536
535 537 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
536 538 """Check if the depth of brackets in the list of tokens drops below 0"""
537 539 parenlev = 0
538 540 for token in tokens:
539 541 if token.string in {"(", "[", "{"}:
540 542 parenlev += 1
541 543 elif token.string in {")", "]", "}"}:
542 544 parenlev -= 1
543 545 if parenlev < 0:
544 546 return True
545 547 return False
546 548
547 549
548 550 def show_linewise_tokens(s: str):
549 551 """For investigation and debugging"""
550 552 if not s.endswith('\n'):
551 553 s += '\n'
552 554 lines = s.splitlines(keepends=True)
553 555 for line in make_tokens_by_line(lines):
554 556 print("Line -------")
555 557 for tokinfo in line:
556 558 print(" ", tokinfo)
557 559
558 560 # Arbitrary limit to prevent getting stuck in infinite loops
559 561 TRANSFORM_LOOP_LIMIT = 500
560 562
561 563 class TransformerManager:
562 564 """Applies various transformations to a cell or code block.
563 565
564 566 The key methods for external use are ``transform_cell()``
565 567 and ``check_complete()``.
566 568 """
567 569 def __init__(self):
568 570 self.cleanup_transforms = [
569 571 leading_empty_lines,
570 572 leading_indent,
571 573 classic_prompt,
572 574 ipython_prompt,
573 575 ]
574 576 self.line_transforms = [
575 577 cell_magic,
576 578 ]
577 579 self.token_transformers = [
578 580 MagicAssign,
579 581 SystemAssign,
580 582 EscapedCommand,
581 583 HelpEnd,
582 584 ]
583 585
584 586 def do_one_token_transform(self, lines):
585 587 """Find and run the transform earliest in the code.
586 588
587 589 Returns (changed, lines).
588 590
589 591 This method is called repeatedly until changed is False, indicating
590 592 that all available transformations are complete.
591 593
592 594 The tokens following IPython special syntax might not be valid, so
593 595 the transformed code is retokenised every time to identify the next
594 596 piece of special syntax. Hopefully long code cells are mostly valid
595 597 Python, not using lots of IPython special syntax, so this shouldn't be
596 598 a performance issue.
597 599 """
598 600 tokens_by_line = make_tokens_by_line(lines)
599 601 candidates = []
600 602 for transformer_cls in self.token_transformers:
601 603 transformer = transformer_cls.find(tokens_by_line)
602 604 if transformer:
603 605 candidates.append(transformer)
604 606
605 607 if not candidates:
606 608 # Nothing to transform
607 609 return False, lines
608 610 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
609 611 for transformer in ordered_transformers:
610 612 try:
611 613 return True, transformer.transform(lines)
612 614 except SyntaxError:
613 615 pass
614 616 return False, lines
615 617
616 618 def do_token_transforms(self, lines):
617 619 for _ in range(TRANSFORM_LOOP_LIMIT):
618 620 changed, lines = self.do_one_token_transform(lines)
619 621 if not changed:
620 622 return lines
621 623
622 624 raise RuntimeError("Input transformation still changing after "
623 625 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
624 626
625 627 def transform_cell(self, cell: str) -> str:
626 628 """Transforms a cell of input code"""
627 629 if not cell.endswith('\n'):
628 630 cell += '\n' # Ensure the cell has a trailing newline
629 631 lines = cell.splitlines(keepends=True)
630 632 for transform in self.cleanup_transforms + self.line_transforms:
631 633 lines = transform(lines)
632 634
633 635 lines = self.do_token_transforms(lines)
634 636 return ''.join(lines)
635 637
636 638 def check_complete(self, cell: str):
637 639 """Return whether a block of code is ready to execute, or should be continued
638 640
639 641 Parameters
640 642 ----------
641 643 source : string
642 644 Python input code, which can be multiline.
643 645
644 646 Returns
645 647 -------
646 648 status : str
647 649 One of 'complete', 'incomplete', or 'invalid' if source is not a
648 650 prefix of valid code.
649 651 indent_spaces : int or None
650 652 The number of spaces by which to indent the next line of code. If
651 653 status is not 'incomplete', this is None.
652 654 """
653 655 # Remember if the lines ends in a new line.
654 656 ends_with_newline = False
655 657 for character in reversed(cell):
656 658 if character == '\n':
657 659 ends_with_newline = True
658 660 break
659 661 elif character.strip():
660 662 break
661 663 else:
662 664 continue
663 665
664 666 if not ends_with_newline:
665 667 # Append an newline for consistent tokenization
666 668 # See https://bugs.python.org/issue33899
667 669 cell += '\n'
668 670
669 671 lines = cell.splitlines(keepends=True)
670 672
671 673 if not lines:
672 674 return 'complete', None
673 675
674 676 if lines[-1].endswith('\\'):
675 677 # Explicit backslash continuation
676 678 return 'incomplete', find_last_indent(lines)
677 679
678 680 try:
679 681 for transform in self.cleanup_transforms:
680 682 if not getattr(transform, 'has_side_effects', False):
681 683 lines = transform(lines)
682 684 except SyntaxError:
683 685 return 'invalid', None
684 686
685 687 if lines[0].startswith('%%'):
686 688 # Special case for cell magics - completion marked by blank line
687 689 if lines[-1].strip():
688 690 return 'incomplete', find_last_indent(lines)
689 691 else:
690 692 return 'complete', None
691 693
692 694 try:
693 695 for transform in self.line_transforms:
694 696 if not getattr(transform, 'has_side_effects', False):
695 697 lines = transform(lines)
696 698 lines = self.do_token_transforms(lines)
697 699 except SyntaxError:
698 700 return 'invalid', None
699 701
700 702 tokens_by_line = make_tokens_by_line(lines)
701 703
702 704 # Bail if we got one line and there are more closing parentheses than
703 705 # the opening ones
704 706 if (
705 707 len(lines) == 1
706 708 and tokens_by_line
707 709 and has_sunken_brackets(tokens_by_line[0])
708 710 ):
709 711 return "invalid", None
710 712
711 713 if not tokens_by_line:
712 714 return 'incomplete', find_last_indent(lines)
713 715
714 716 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
715 717 # We're in a multiline string or expression
716 718 return 'incomplete', find_last_indent(lines)
717 719
718 720 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
719 721
720 722 # Pop the last line which only contains DEDENTs and ENDMARKER
721 723 last_token_line = None
722 724 if {t.type for t in tokens_by_line[-1]} in [
723 725 {tokenize.DEDENT, tokenize.ENDMARKER},
724 726 {tokenize.ENDMARKER}
725 727 ] and len(tokens_by_line) > 1:
726 728 last_token_line = tokens_by_line.pop()
727 729
728 730 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
729 731 tokens_by_line[-1].pop()
730 732
731 733 if not tokens_by_line[-1]:
732 734 return 'incomplete', find_last_indent(lines)
733 735
734 736 if tokens_by_line[-1][-1].string == ':':
735 737 # The last line starts a block (e.g. 'if foo:')
736 738 ix = 0
737 739 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
738 740 ix += 1
739 741
740 742 indent = tokens_by_line[-1][ix].start[1]
741 743 return 'incomplete', indent + 4
742 744
743 745 if tokens_by_line[-1][0].line.endswith('\\'):
744 746 return 'incomplete', None
745 747
746 748 # At this point, our checks think the code is complete (or invalid).
747 749 # We'll use codeop.compile_command to check this with the real parser
748 750 try:
749 751 with warnings.catch_warnings():
750 752 warnings.simplefilter('error', SyntaxWarning)
751 753 res = compile_command(''.join(lines), symbol='exec')
752 754 except (SyntaxError, OverflowError, ValueError, TypeError,
753 755 MemoryError, SyntaxWarning):
754 756 return 'invalid', None
755 757 else:
756 758 if res is None:
757 759 return 'incomplete', find_last_indent(lines)
758 760
759 761 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
760 762 if ends_with_newline:
761 763 return 'complete', None
762 764 return 'incomplete', find_last_indent(lines)
763 765
764 766 # If there's a blank line at the end, assume we're ready to execute
765 767 if not lines[-1].strip():
766 768 return 'complete', None
767 769
768 770 return 'complete', None
769 771
770 772
771 773 def find_last_indent(lines):
772 774 m = _indent_re.match(lines[-1])
773 775 if not m:
774 776 return 0
775 777 return len(m.group(0).replace('\t', ' '*4))
778
779
780 class MaybeAsyncCommandCompiler(CommandCompiler):
781 def __init__(self, extra_flags=0):
782 self.compiler = self._compiler
783 self.extra_flags = extra_flags
784
785 def _compiler(self, source, filename, symbol, flags, feature):
786 flags |= self.extra_flags
787 return compile(source, filename, symbol, flags, feature)
788
789 if (sys.version_info.major, sys.version_info.minor) >= (3, 8):
790 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
791 else:
792 _extra_flags = ast.PyCF_ONLY_AST
793
794 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
General Comments 0
You need to be logged in to leave comments. Login now