##// END OF EJS Templates
Fix `compile()` invocation
Artur Svistunov -
Show More
@@ -1,794 +1,798 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 import ast
14 14 import sys
15 from codeop import CommandCompiler
15 from codeop import CommandCompiler, Compile
16 16 import re
17 17 import tokenize
18 18 from typing import List, Tuple, Optional, Any
19 19 import warnings
20 20
21 21 _indent_re = re.compile(r'^[ \t]+')
22 22
23 23 def leading_empty_lines(lines):
24 24 """Remove leading empty lines
25 25
26 26 If the leading lines are empty or contain only whitespace, they will be
27 27 removed.
28 28 """
29 29 if not lines:
30 30 return lines
31 31 for i, line in enumerate(lines):
32 32 if line and not line.isspace():
33 33 return lines[i:]
34 34 return lines
35 35
36 36 def leading_indent(lines):
37 37 """Remove leading indentation.
38 38
39 39 If the first line starts with a spaces or tabs, the same whitespace will be
40 40 removed from each following line in the cell.
41 41 """
42 42 if not lines:
43 43 return lines
44 44 m = _indent_re.match(lines[0])
45 45 if not m:
46 46 return lines
47 47 space = m.group(0)
48 48 n = len(space)
49 49 return [l[n:] if l.startswith(space) else l
50 50 for l in lines]
51 51
52 52 class PromptStripper:
53 53 """Remove matching input prompts from a block of input.
54 54
55 55 Parameters
56 56 ----------
57 57 prompt_re : regular expression
58 58 A regular expression matching any input prompt (including continuation,
59 59 e.g. ``...``)
60 60 initial_re : regular expression, optional
61 61 A regular expression matching only the initial prompt, but not continuation.
62 62 If no initial expression is given, prompt_re will be used everywhere.
63 63 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
64 64 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
65 65
66 66 Notes
67 67 -----
68 68
69 69 If initial_re and prompt_re differ,
70 70 only initial_re will be tested against the first line.
71 71 If any prompt is found on the first two lines,
72 72 prompts will be stripped from the rest of the block.
73 73 """
74 74 def __init__(self, prompt_re, initial_re=None):
75 75 self.prompt_re = prompt_re
76 76 self.initial_re = initial_re or prompt_re
77 77
78 78 def _strip(self, lines):
79 79 return [self.prompt_re.sub('', l, count=1) for l in lines]
80 80
81 81 def __call__(self, lines):
82 82 if not lines:
83 83 return lines
84 84 if self.initial_re.match(lines[0]) or \
85 85 (len(lines) > 1 and self.prompt_re.match(lines[1])):
86 86 return self._strip(lines)
87 87 return lines
88 88
89 89 classic_prompt = PromptStripper(
90 90 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
91 91 initial_re=re.compile(r'^>>>( |$)')
92 92 )
93 93
94 94 ipython_prompt = PromptStripper(
95 95 re.compile(
96 96 r"""
97 97 ^( # Match from the beginning of a line, either:
98 98
99 99 # 1. First-line prompt:
100 100 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
101 101 In\ # The 'In' of the prompt, with a space
102 102 \[\d+\]: # Command index, as displayed in the prompt
103 103 \ # With a mandatory trailing space
104 104
105 105 | # ... or ...
106 106
107 107 # 2. The three dots of the multiline prompt
108 108 \s* # All leading whitespace characters
109 109 \.{3,}: # The three (or more) dots
110 110 \ ? # With an optional trailing space
111 111
112 112 )
113 113 """,
114 114 re.VERBOSE,
115 115 )
116 116 )
117 117
118 118
119 119 def cell_magic(lines):
120 120 if not lines or not lines[0].startswith('%%'):
121 121 return lines
122 122 if re.match(r'%%\w+\?', lines[0]):
123 123 # This case will be handled by help_end
124 124 return lines
125 125 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
126 126 body = ''.join(lines[1:])
127 127 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
128 128 % (magic_name, first_line, body)]
129 129
130 130
131 131 def _find_assign_op(token_line) -> Optional[int]:
132 132 """Get the index of the first assignment in the line ('=' not inside brackets)
133 133
134 134 Note: We don't try to support multiple special assignment (a = b = %foo)
135 135 """
136 136 paren_level = 0
137 137 for i, ti in enumerate(token_line):
138 138 s = ti.string
139 139 if s == '=' and paren_level == 0:
140 140 return i
141 141 if s in {'(','[','{'}:
142 142 paren_level += 1
143 143 elif s in {')', ']', '}'}:
144 144 if paren_level > 0:
145 145 paren_level -= 1
146 146 return None
147 147
148 148 def find_end_of_continued_line(lines, start_line: int):
149 149 """Find the last line of a line explicitly extended using backslashes.
150 150
151 151 Uses 0-indexed line numbers.
152 152 """
153 153 end_line = start_line
154 154 while lines[end_line].endswith('\\\n'):
155 155 end_line += 1
156 156 if end_line >= len(lines):
157 157 break
158 158 return end_line
159 159
160 160 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
161 161 r"""Assemble a single line from multiple continued line pieces
162 162
163 163 Continued lines are lines ending in ``\``, and the line following the last
164 164 ``\`` in the block.
165 165
166 166 For example, this code continues over multiple lines::
167 167
168 168 if (assign_ix is not None) \
169 169 and (len(line) >= assign_ix + 2) \
170 170 and (line[assign_ix+1].string == '%') \
171 171 and (line[assign_ix+2].type == tokenize.NAME):
172 172
173 173 This statement contains four continued line pieces.
174 174 Assembling these pieces into a single line would give::
175 175
176 176 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
177 177
178 178 This uses 0-indexed line numbers. *start* is (lineno, colno).
179 179
180 180 Used to allow ``%magic`` and ``!system`` commands to be continued over
181 181 multiple lines.
182 182 """
183 183 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
184 184 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
185 185 + [parts[-1].rstrip()]) # Strip newline from last line
186 186
187 187 class TokenTransformBase:
188 188 """Base class for transformations which examine tokens.
189 189
190 190 Special syntax should not be transformed when it occurs inside strings or
191 191 comments. This is hard to reliably avoid with regexes. The solution is to
192 192 tokenise the code as Python, and recognise the special syntax in the tokens.
193 193
194 194 IPython's special syntax is not valid Python syntax, so tokenising may go
195 195 wrong after the special syntax starts. These classes therefore find and
196 196 transform *one* instance of special syntax at a time into regular Python
197 197 syntax. After each transformation, tokens are regenerated to find the next
198 198 piece of special syntax.
199 199
200 200 Subclasses need to implement one class method (find)
201 201 and one regular method (transform).
202 202
203 203 The priority attribute can select which transformation to apply if multiple
204 204 transformers match in the same place. Lower numbers have higher priority.
205 205 This allows "%magic?" to be turned into a help call rather than a magic call.
206 206 """
207 207 # Lower numbers -> higher priority (for matches in the same location)
208 208 priority = 10
209 209
210 210 def sortby(self):
211 211 return self.start_line, self.start_col, self.priority
212 212
213 213 def __init__(self, start):
214 214 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
215 215 self.start_col = start[1]
216 216
217 217 @classmethod
218 218 def find(cls, tokens_by_line):
219 219 """Find one instance of special syntax in the provided tokens.
220 220
221 221 Tokens are grouped into logical lines for convenience,
222 222 so it is easy to e.g. look at the first token of each line.
223 223 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
224 224
225 225 This should return an instance of its class, pointing to the start
226 226 position it has found, or None if it found no match.
227 227 """
228 228 raise NotImplementedError
229 229
230 230 def transform(self, lines: List[str]):
231 231 """Transform one instance of special syntax found by ``find()``
232 232
233 233 Takes a list of strings representing physical lines,
234 234 returns a similar list of transformed lines.
235 235 """
236 236 raise NotImplementedError
237 237
238 238 class MagicAssign(TokenTransformBase):
239 239 """Transformer for assignments from magics (a = %foo)"""
240 240 @classmethod
241 241 def find(cls, tokens_by_line):
242 242 """Find the first magic assignment (a = %foo) in the cell.
243 243 """
244 244 for line in tokens_by_line:
245 245 assign_ix = _find_assign_op(line)
246 246 if (assign_ix is not None) \
247 247 and (len(line) >= assign_ix + 2) \
248 248 and (line[assign_ix+1].string == '%') \
249 249 and (line[assign_ix+2].type == tokenize.NAME):
250 250 return cls(line[assign_ix+1].start)
251 251
252 252 def transform(self, lines: List[str]):
253 253 """Transform a magic assignment found by the ``find()`` classmethod.
254 254 """
255 255 start_line, start_col = self.start_line, self.start_col
256 256 lhs = lines[start_line][:start_col]
257 257 end_line = find_end_of_continued_line(lines, start_line)
258 258 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
259 259 assert rhs.startswith('%'), rhs
260 260 magic_name, _, args = rhs[1:].partition(' ')
261 261
262 262 lines_before = lines[:start_line]
263 263 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
264 264 new_line = lhs + call + '\n'
265 265 lines_after = lines[end_line+1:]
266 266
267 267 return lines_before + [new_line] + lines_after
268 268
269 269
270 270 class SystemAssign(TokenTransformBase):
271 271 """Transformer for assignments from system commands (a = !foo)"""
272 272 @classmethod
273 273 def find(cls, tokens_by_line):
274 274 """Find the first system assignment (a = !foo) in the cell.
275 275 """
276 276 for line in tokens_by_line:
277 277 assign_ix = _find_assign_op(line)
278 278 if (assign_ix is not None) \
279 279 and not line[assign_ix].line.strip().startswith('=') \
280 280 and (len(line) >= assign_ix + 2) \
281 281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
282 282 ix = assign_ix + 1
283 283
284 284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
285 285 if line[ix].string == '!':
286 286 return cls(line[ix].start)
287 287 elif not line[ix].string.isspace():
288 288 break
289 289 ix += 1
290 290
291 291 def transform(self, lines: List[str]):
292 292 """Transform a system assignment found by the ``find()`` classmethod.
293 293 """
294 294 start_line, start_col = self.start_line, self.start_col
295 295
296 296 lhs = lines[start_line][:start_col]
297 297 end_line = find_end_of_continued_line(lines, start_line)
298 298 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
299 299 assert rhs.startswith('!'), rhs
300 300 cmd = rhs[1:]
301 301
302 302 lines_before = lines[:start_line]
303 303 call = "get_ipython().getoutput({!r})".format(cmd)
304 304 new_line = lhs + call + '\n'
305 305 lines_after = lines[end_line + 1:]
306 306
307 307 return lines_before + [new_line] + lines_after
308 308
309 309 # The escape sequences that define the syntax transformations IPython will
310 310 # apply to user input. These can NOT be just changed here: many regular
311 311 # expressions and other parts of the code may use their hardcoded values, and
312 312 # for all intents and purposes they constitute the 'IPython syntax', so they
313 313 # should be considered fixed.
314 314
315 315 ESC_SHELL = '!' # Send line to underlying system shell
316 316 ESC_SH_CAP = '!!' # Send line to system shell and capture output
317 317 ESC_HELP = '?' # Find information about object
318 318 ESC_HELP2 = '??' # Find extra-detailed information about object
319 319 ESC_MAGIC = '%' # Call magic function
320 320 ESC_MAGIC2 = '%%' # Call cell-magic function
321 321 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
322 322 ESC_QUOTE2 = ';' # Quote all args as a single string, call
323 323 ESC_PAREN = '/' # Call first argument with rest of line as arguments
324 324
325 325 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
326 326 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
327 327
328 328 def _make_help_call(target, esc, next_input=None):
329 329 """Prepares a pinfo(2)/psearch call from a target name and the escape
330 330 (i.e. ? or ??)"""
331 331 method = 'pinfo2' if esc == '??' \
332 332 else 'psearch' if '*' in target \
333 333 else 'pinfo'
334 334 arg = " ".join([method, target])
335 335 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
336 336 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
337 337 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
338 338 if next_input is None:
339 339 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
340 340 else:
341 341 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
342 342 (next_input, t_magic_name, t_magic_arg_s)
343 343
344 344 def _tr_help(content):
345 345 """Translate lines escaped with: ?
346 346
347 347 A naked help line should fire the intro help screen (shell.show_usage())
348 348 """
349 349 if not content:
350 350 return 'get_ipython().show_usage()'
351 351
352 352 return _make_help_call(content, '?')
353 353
354 354 def _tr_help2(content):
355 355 """Translate lines escaped with: ??
356 356
357 357 A naked help line should fire the intro help screen (shell.show_usage())
358 358 """
359 359 if not content:
360 360 return 'get_ipython().show_usage()'
361 361
362 362 return _make_help_call(content, '??')
363 363
364 364 def _tr_magic(content):
365 365 "Translate lines escaped with a percent sign: %"
366 366 name, _, args = content.partition(' ')
367 367 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
368 368
369 369 def _tr_quote(content):
370 370 "Translate lines escaped with a comma: ,"
371 371 name, _, args = content.partition(' ')
372 372 return '%s("%s")' % (name, '", "'.join(args.split()) )
373 373
374 374 def _tr_quote2(content):
375 375 "Translate lines escaped with a semicolon: ;"
376 376 name, _, args = content.partition(' ')
377 377 return '%s("%s")' % (name, args)
378 378
379 379 def _tr_paren(content):
380 380 "Translate lines escaped with a slash: /"
381 381 name, _, args = content.partition(' ')
382 382 return '%s(%s)' % (name, ", ".join(args.split()))
383 383
384 384 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
385 385 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
386 386 ESC_HELP : _tr_help,
387 387 ESC_HELP2 : _tr_help2,
388 388 ESC_MAGIC : _tr_magic,
389 389 ESC_QUOTE : _tr_quote,
390 390 ESC_QUOTE2 : _tr_quote2,
391 391 ESC_PAREN : _tr_paren }
392 392
393 393 class EscapedCommand(TokenTransformBase):
394 394 """Transformer for escaped commands like %foo, !foo, or /foo"""
395 395 @classmethod
396 396 def find(cls, tokens_by_line):
397 397 """Find the first escaped command (%foo, !foo, etc.) in the cell.
398 398 """
399 399 for line in tokens_by_line:
400 400 if not line:
401 401 continue
402 402 ix = 0
403 403 ll = len(line)
404 404 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
405 405 ix += 1
406 406 if ix >= ll:
407 407 continue
408 408 if line[ix].string in ESCAPE_SINGLES:
409 409 return cls(line[ix].start)
410 410
411 411 def transform(self, lines):
412 412 """Transform an escaped line found by the ``find()`` classmethod.
413 413 """
414 414 start_line, start_col = self.start_line, self.start_col
415 415
416 416 indent = lines[start_line][:start_col]
417 417 end_line = find_end_of_continued_line(lines, start_line)
418 418 line = assemble_continued_line(lines, (start_line, start_col), end_line)
419 419
420 420 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
421 421 escape, content = line[:2], line[2:]
422 422 else:
423 423 escape, content = line[:1], line[1:]
424 424
425 425 if escape in tr:
426 426 call = tr[escape](content)
427 427 else:
428 428 call = ''
429 429
430 430 lines_before = lines[:start_line]
431 431 new_line = indent + call + '\n'
432 432 lines_after = lines[end_line + 1:]
433 433
434 434 return lines_before + [new_line] + lines_after
435 435
436 436 _help_end_re = re.compile(r"""(%{0,2}
437 437 (?!\d)[\w*]+ # Variable name
438 438 (\.(?!\d)[\w*]+)* # .etc.etc
439 439 )
440 440 (\?\??)$ # ? or ??
441 441 """,
442 442 re.VERBOSE)
443 443
444 444 class HelpEnd(TokenTransformBase):
445 445 """Transformer for help syntax: obj? and obj??"""
446 446 # This needs to be higher priority (lower number) than EscapedCommand so
447 447 # that inspecting magics (%foo?) works.
448 448 priority = 5
449 449
450 450 def __init__(self, start, q_locn):
451 451 super().__init__(start)
452 452 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
453 453 self.q_col = q_locn[1]
454 454
455 455 @classmethod
456 456 def find(cls, tokens_by_line):
457 457 """Find the first help command (foo?) in the cell.
458 458 """
459 459 for line in tokens_by_line:
460 460 # Last token is NEWLINE; look at last but one
461 461 if len(line) > 2 and line[-2].string == '?':
462 462 # Find the first token that's not INDENT/DEDENT
463 463 ix = 0
464 464 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
465 465 ix += 1
466 466 return cls(line[ix].start, line[-2].start)
467 467
468 468 def transform(self, lines):
469 469 """Transform a help command found by the ``find()`` classmethod.
470 470 """
471 471 piece = ''.join(lines[self.start_line:self.q_line+1])
472 472 indent, content = piece[:self.start_col], piece[self.start_col:]
473 473 lines_before = lines[:self.start_line]
474 474 lines_after = lines[self.q_line + 1:]
475 475
476 476 m = _help_end_re.search(content)
477 477 if not m:
478 478 raise SyntaxError(content)
479 479 assert m is not None, content
480 480 target = m.group(1)
481 481 esc = m.group(3)
482 482
483 483 # If we're mid-command, put it back on the next prompt for the user.
484 484 next_input = None
485 485 if (not lines_before) and (not lines_after) \
486 486 and content.strip() != m.group(0):
487 487 next_input = content.rstrip('?\n')
488 488
489 489 call = _make_help_call(target, esc, next_input=next_input)
490 490 new_line = indent + call + '\n'
491 491
492 492 return lines_before + [new_line] + lines_after
493 493
494 494 def make_tokens_by_line(lines:List[str]):
495 495 """Tokenize a series of lines and group tokens by line.
496 496
497 497 The tokens for a multiline Python string or expression are grouped as one
498 498 line. All lines except the last lines should keep their line ending ('\\n',
499 499 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
500 500 for example when passing block of text to this function.
501 501
502 502 """
503 503 # NL tokens are used inside multiline expressions, but also after blank
504 504 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
505 505 # We want to group the former case together but split the latter, so we
506 506 # track parentheses level, similar to the internals of tokenize.
507 507
508 508 # reexported from token on 3.7+
509 509 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
510 510 tokens_by_line:List[List[Any]] = [[]]
511 511 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
512 512 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
513 513 parenlev = 0
514 514 try:
515 515 for token in tokenize.generate_tokens(iter(lines).__next__):
516 516 tokens_by_line[-1].append(token)
517 517 if (token.type == NEWLINE) \
518 518 or ((token.type == NL) and (parenlev <= 0)):
519 519 tokens_by_line.append([])
520 520 elif token.string in {'(', '[', '{'}:
521 521 parenlev += 1
522 522 elif token.string in {')', ']', '}'}:
523 523 if parenlev > 0:
524 524 parenlev -= 1
525 525 except tokenize.TokenError:
526 526 # Input ended in a multiline string or expression. That's OK for us.
527 527 pass
528 528
529 529
530 530 if not tokens_by_line[-1]:
531 531 tokens_by_line.pop()
532 532
533 533
534 534 return tokens_by_line
535 535
536 536
537 537 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
538 538 """Check if the depth of brackets in the list of tokens drops below 0"""
539 539 parenlev = 0
540 540 for token in tokens:
541 541 if token.string in {"(", "[", "{"}:
542 542 parenlev += 1
543 543 elif token.string in {")", "]", "}"}:
544 544 parenlev -= 1
545 545 if parenlev < 0:
546 546 return True
547 547 return False
548 548
549 549
550 550 def show_linewise_tokens(s: str):
551 551 """For investigation and debugging"""
552 552 if not s.endswith('\n'):
553 553 s += '\n'
554 554 lines = s.splitlines(keepends=True)
555 555 for line in make_tokens_by_line(lines):
556 556 print("Line -------")
557 557 for tokinfo in line:
558 558 print(" ", tokinfo)
559 559
560 560 # Arbitrary limit to prevent getting stuck in infinite loops
561 561 TRANSFORM_LOOP_LIMIT = 500
562 562
563 563 class TransformerManager:
564 564 """Applies various transformations to a cell or code block.
565 565
566 566 The key methods for external use are ``transform_cell()``
567 567 and ``check_complete()``.
568 568 """
569 569 def __init__(self):
570 570 self.cleanup_transforms = [
571 571 leading_empty_lines,
572 572 leading_indent,
573 573 classic_prompt,
574 574 ipython_prompt,
575 575 ]
576 576 self.line_transforms = [
577 577 cell_magic,
578 578 ]
579 579 self.token_transformers = [
580 580 MagicAssign,
581 581 SystemAssign,
582 582 EscapedCommand,
583 583 HelpEnd,
584 584 ]
585 585
586 586 def do_one_token_transform(self, lines):
587 587 """Find and run the transform earliest in the code.
588 588
589 589 Returns (changed, lines).
590 590
591 591 This method is called repeatedly until changed is False, indicating
592 592 that all available transformations are complete.
593 593
594 594 The tokens following IPython special syntax might not be valid, so
595 595 the transformed code is retokenised every time to identify the next
596 596 piece of special syntax. Hopefully long code cells are mostly valid
597 597 Python, not using lots of IPython special syntax, so this shouldn't be
598 598 a performance issue.
599 599 """
600 600 tokens_by_line = make_tokens_by_line(lines)
601 601 candidates = []
602 602 for transformer_cls in self.token_transformers:
603 603 transformer = transformer_cls.find(tokens_by_line)
604 604 if transformer:
605 605 candidates.append(transformer)
606 606
607 607 if not candidates:
608 608 # Nothing to transform
609 609 return False, lines
610 610 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
611 611 for transformer in ordered_transformers:
612 612 try:
613 613 return True, transformer.transform(lines)
614 614 except SyntaxError:
615 615 pass
616 616 return False, lines
617 617
618 618 def do_token_transforms(self, lines):
619 619 for _ in range(TRANSFORM_LOOP_LIMIT):
620 620 changed, lines = self.do_one_token_transform(lines)
621 621 if not changed:
622 622 return lines
623 623
624 624 raise RuntimeError("Input transformation still changing after "
625 625 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
626 626
627 627 def transform_cell(self, cell: str) -> str:
628 628 """Transforms a cell of input code"""
629 629 if not cell.endswith('\n'):
630 630 cell += '\n' # Ensure the cell has a trailing newline
631 631 lines = cell.splitlines(keepends=True)
632 632 for transform in self.cleanup_transforms + self.line_transforms:
633 633 lines = transform(lines)
634 634
635 635 lines = self.do_token_transforms(lines)
636 636 return ''.join(lines)
637 637
638 638 def check_complete(self, cell: str):
639 639 """Return whether a block of code is ready to execute, or should be continued
640 640
641 641 Parameters
642 642 ----------
643 643 source : string
644 644 Python input code, which can be multiline.
645 645
646 646 Returns
647 647 -------
648 648 status : str
649 649 One of 'complete', 'incomplete', or 'invalid' if source is not a
650 650 prefix of valid code.
651 651 indent_spaces : int or None
652 652 The number of spaces by which to indent the next line of code. If
653 653 status is not 'incomplete', this is None.
654 654 """
655 655 # Remember if the lines ends in a new line.
656 656 ends_with_newline = False
657 657 for character in reversed(cell):
658 658 if character == '\n':
659 659 ends_with_newline = True
660 660 break
661 661 elif character.strip():
662 662 break
663 663 else:
664 664 continue
665 665
666 666 if not ends_with_newline:
667 667 # Append an newline for consistent tokenization
668 668 # See https://bugs.python.org/issue33899
669 669 cell += '\n'
670 670
671 671 lines = cell.splitlines(keepends=True)
672 672
673 673 if not lines:
674 674 return 'complete', None
675 675
676 676 if lines[-1].endswith('\\'):
677 677 # Explicit backslash continuation
678 678 return 'incomplete', find_last_indent(lines)
679 679
680 680 try:
681 681 for transform in self.cleanup_transforms:
682 682 if not getattr(transform, 'has_side_effects', False):
683 683 lines = transform(lines)
684 684 except SyntaxError:
685 685 return 'invalid', None
686 686
687 687 if lines[0].startswith('%%'):
688 688 # Special case for cell magics - completion marked by blank line
689 689 if lines[-1].strip():
690 690 return 'incomplete', find_last_indent(lines)
691 691 else:
692 692 return 'complete', None
693 693
694 694 try:
695 695 for transform in self.line_transforms:
696 696 if not getattr(transform, 'has_side_effects', False):
697 697 lines = transform(lines)
698 698 lines = self.do_token_transforms(lines)
699 699 except SyntaxError:
700 700 return 'invalid', None
701 701
702 702 tokens_by_line = make_tokens_by_line(lines)
703 703
704 704 # Bail if we got one line and there are more closing parentheses than
705 705 # the opening ones
706 706 if (
707 707 len(lines) == 1
708 708 and tokens_by_line
709 709 and has_sunken_brackets(tokens_by_line[0])
710 710 ):
711 711 return "invalid", None
712 712
713 713 if not tokens_by_line:
714 714 return 'incomplete', find_last_indent(lines)
715 715
716 716 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
717 717 # We're in a multiline string or expression
718 718 return 'incomplete', find_last_indent(lines)
719 719
720 720 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
721 721
722 722 # Pop the last line which only contains DEDENTs and ENDMARKER
723 723 last_token_line = None
724 724 if {t.type for t in tokens_by_line[-1]} in [
725 725 {tokenize.DEDENT, tokenize.ENDMARKER},
726 726 {tokenize.ENDMARKER}
727 727 ] and len(tokens_by_line) > 1:
728 728 last_token_line = tokens_by_line.pop()
729 729
730 730 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
731 731 tokens_by_line[-1].pop()
732 732
733 733 if not tokens_by_line[-1]:
734 734 return 'incomplete', find_last_indent(lines)
735 735
736 736 if tokens_by_line[-1][-1].string == ':':
737 737 # The last line starts a block (e.g. 'if foo:')
738 738 ix = 0
739 739 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
740 740 ix += 1
741 741
742 742 indent = tokens_by_line[-1][ix].start[1]
743 743 return 'incomplete', indent + 4
744 744
745 745 if tokens_by_line[-1][0].line.endswith('\\'):
746 746 return 'incomplete', None
747 747
748 748 # At this point, our checks think the code is complete (or invalid).
749 749 # We'll use codeop.compile_command to check this with the real parser
750 750 try:
751 751 with warnings.catch_warnings():
752 752 warnings.simplefilter('error', SyntaxWarning)
753 753 res = compile_command(''.join(lines), symbol='exec')
754 754 except (SyntaxError, OverflowError, ValueError, TypeError,
755 755 MemoryError, SyntaxWarning):
756 756 return 'invalid', None
757 757 else:
758 758 if res is None:
759 759 return 'incomplete', find_last_indent(lines)
760 760
761 761 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
762 762 if ends_with_newline:
763 763 return 'complete', None
764 764 return 'incomplete', find_last_indent(lines)
765 765
766 766 # If there's a blank line at the end, assume we're ready to execute
767 767 if not lines[-1].strip():
768 768 return 'complete', None
769 769
770 770 return 'complete', None
771 771
772 772
773 773 def find_last_indent(lines):
774 774 m = _indent_re.match(lines[-1])
775 775 if not m:
776 776 return 0
777 777 return len(m.group(0).replace('\t', ' '*4))
778 778
779 779
780 class MaybeAsyncCompile(Compile):
781 def __init__(self, extra_flags=0):
782 super().__init__()
783 self.flags |= extra_flags
784
785 __call__ = compile
786
787
780 788 class MaybeAsyncCommandCompiler(CommandCompiler):
781 789 def __init__(self, extra_flags=0):
782 self.compiler = self._compiler
783 self.extra_flags = extra_flags
790 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
784 791
785 def _compiler(self, source, filename, symbol, flags, feature):
786 flags |= self.extra_flags
787 return compile(source, filename, symbol, flags, feature)
788 792
789 793 if (sys.version_info.major, sys.version_info.minor) >= (3, 8):
790 794 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
791 795 else:
792 796 _extra_flags = ast.PyCF_ONLY_AST
793 797
794 798 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
General Comments 0
You need to be logged in to leave comments. Login now