##// END OF EJS Templates
validate that ESC_PAREN ('/') is followed by a callable name and not empty
Guy Bortnikov -
Show More
@@ -1,827 +1,830
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 import ast
14 14 from codeop import CommandCompiler, Compile
15 15 import re
16 16 import sys
17 17 import tokenize
18 18 from typing import List, Tuple, Optional, Any
19 19 import warnings
20 20
21 21 from IPython.utils import tokenutil
22 22
23 23 _indent_re = re.compile(r'^[ \t]+')
24 24
25 25 def leading_empty_lines(lines):
26 26 """Remove leading empty lines
27 27
28 28 If the leading lines are empty or contain only whitespace, they will be
29 29 removed.
30 30 """
31 31 if not lines:
32 32 return lines
33 33 for i, line in enumerate(lines):
34 34 if line and not line.isspace():
35 35 return lines[i:]
36 36 return lines
37 37
38 38 def leading_indent(lines):
39 39 """Remove leading indentation.
40 40
41 41 If the first line starts with a spaces or tabs, the same whitespace will be
42 42 removed from each following line in the cell.
43 43 """
44 44 if not lines:
45 45 return lines
46 46 m = _indent_re.match(lines[0])
47 47 if not m:
48 48 return lines
49 49 space = m.group(0)
50 50 n = len(space)
51 51 return [l[n:] if l.startswith(space) else l
52 52 for l in lines]
53 53
54 54 class PromptStripper:
55 55 """Remove matching input prompts from a block of input.
56 56
57 57 Parameters
58 58 ----------
59 59 prompt_re : regular expression
60 60 A regular expression matching any input prompt (including continuation,
61 61 e.g. ``...``)
62 62 initial_re : regular expression, optional
63 63 A regular expression matching only the initial prompt, but not continuation.
64 64 If no initial expression is given, prompt_re will be used everywhere.
65 65 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
66 66 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
67 67
68 68 Notes
69 69 -----
70 70
71 71 If initial_re and prompt_re differ,
72 72 only initial_re will be tested against the first line.
73 73 If any prompt is found on the first two lines,
74 74 prompts will be stripped from the rest of the block.
75 75 """
76 76 def __init__(self, prompt_re, initial_re=None):
77 77 self.prompt_re = prompt_re
78 78 self.initial_re = initial_re or prompt_re
79 79
80 80 def _strip(self, lines):
81 81 return [self.prompt_re.sub('', l, count=1) for l in lines]
82 82
83 83 def __call__(self, lines):
84 84 if not lines:
85 85 return lines
86 86 if self.initial_re.match(lines[0]) or \
87 87 (len(lines) > 1 and self.prompt_re.match(lines[1])):
88 88 return self._strip(lines)
89 89 return lines
90 90
91 91 classic_prompt = PromptStripper(
92 92 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
93 93 initial_re=re.compile(r'^>>>( |$)')
94 94 )
95 95
96 96 ipython_prompt = PromptStripper(
97 97 re.compile(
98 98 r"""
99 99 ^( # Match from the beginning of a line, either:
100 100
101 101 # 1. First-line prompt:
102 102 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
103 103 In\ # The 'In' of the prompt, with a space
104 104 \[\d+\]: # Command index, as displayed in the prompt
105 105 \ # With a mandatory trailing space
106 106
107 107 | # ... or ...
108 108
109 109 # 2. The three dots of the multiline prompt
110 110 \s* # All leading whitespace characters
111 111 \.{3,}: # The three (or more) dots
112 112 \ ? # With an optional trailing space
113 113
114 114 )
115 115 """,
116 116 re.VERBOSE,
117 117 )
118 118 )
119 119
120 120
121 121 def cell_magic(lines):
122 122 if not lines or not lines[0].startswith('%%'):
123 123 return lines
124 124 if re.match(r'%%\w+\?', lines[0]):
125 125 # This case will be handled by help_end
126 126 return lines
127 127 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
128 128 body = ''.join(lines[1:])
129 129 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
130 130 % (magic_name, first_line, body)]
131 131
132 132
133 133 def _find_assign_op(token_line) -> Optional[int]:
134 134 """Get the index of the first assignment in the line ('=' not inside brackets)
135 135
136 136 Note: We don't try to support multiple special assignment (a = b = %foo)
137 137 """
138 138 paren_level = 0
139 139 for i, ti in enumerate(token_line):
140 140 s = ti.string
141 141 if s == '=' and paren_level == 0:
142 142 return i
143 143 if s in {'(','[','{'}:
144 144 paren_level += 1
145 145 elif s in {')', ']', '}'}:
146 146 if paren_level > 0:
147 147 paren_level -= 1
148 148 return None
149 149
150 150 def find_end_of_continued_line(lines, start_line: int):
151 151 """Find the last line of a line explicitly extended using backslashes.
152 152
153 153 Uses 0-indexed line numbers.
154 154 """
155 155 end_line = start_line
156 156 while lines[end_line].endswith('\\\n'):
157 157 end_line += 1
158 158 if end_line >= len(lines):
159 159 break
160 160 return end_line
161 161
162 162 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
163 163 r"""Assemble a single line from multiple continued line pieces
164 164
165 165 Continued lines are lines ending in ``\``, and the line following the last
166 166 ``\`` in the block.
167 167
168 168 For example, this code continues over multiple lines::
169 169
170 170 if (assign_ix is not None) \
171 171 and (len(line) >= assign_ix + 2) \
172 172 and (line[assign_ix+1].string == '%') \
173 173 and (line[assign_ix+2].type == tokenize.NAME):
174 174
175 175 This statement contains four continued line pieces.
176 176 Assembling these pieces into a single line would give::
177 177
178 178 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
179 179
180 180 This uses 0-indexed line numbers. *start* is (lineno, colno).
181 181
182 182 Used to allow ``%magic`` and ``!system`` commands to be continued over
183 183 multiple lines.
184 184 """
185 185 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
186 186 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
187 187 + [parts[-1].rstrip()]) # Strip newline from last line
188 188
189 189 class TokenTransformBase:
190 190 """Base class for transformations which examine tokens.
191 191
192 192 Special syntax should not be transformed when it occurs inside strings or
193 193 comments. This is hard to reliably avoid with regexes. The solution is to
194 194 tokenise the code as Python, and recognise the special syntax in the tokens.
195 195
196 196 IPython's special syntax is not valid Python syntax, so tokenising may go
197 197 wrong after the special syntax starts. These classes therefore find and
198 198 transform *one* instance of special syntax at a time into regular Python
199 199 syntax. After each transformation, tokens are regenerated to find the next
200 200 piece of special syntax.
201 201
202 202 Subclasses need to implement one class method (find)
203 203 and one regular method (transform).
204 204
205 205 The priority attribute can select which transformation to apply if multiple
206 206 transformers match in the same place. Lower numbers have higher priority.
207 207 This allows "%magic?" to be turned into a help call rather than a magic call.
208 208 """
209 209 # Lower numbers -> higher priority (for matches in the same location)
210 210 priority = 10
211 211
212 212 def sortby(self):
213 213 return self.start_line, self.start_col, self.priority
214 214
215 215 def __init__(self, start):
216 216 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
217 217 self.start_col = start[1]
218 218
219 219 @classmethod
220 220 def find(cls, tokens_by_line):
221 221 """Find one instance of special syntax in the provided tokens.
222 222
223 223 Tokens are grouped into logical lines for convenience,
224 224 so it is easy to e.g. look at the first token of each line.
225 225 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
226 226
227 227 This should return an instance of its class, pointing to the start
228 228 position it has found, or None if it found no match.
229 229 """
230 230 raise NotImplementedError
231 231
232 232 def transform(self, lines: List[str]):
233 233 """Transform one instance of special syntax found by ``find()``
234 234
235 235 Takes a list of strings representing physical lines,
236 236 returns a similar list of transformed lines.
237 237 """
238 238 raise NotImplementedError
239 239
240 240 class MagicAssign(TokenTransformBase):
241 241 """Transformer for assignments from magics (a = %foo)"""
242 242 @classmethod
243 243 def find(cls, tokens_by_line):
244 244 """Find the first magic assignment (a = %foo) in the cell.
245 245 """
246 246 for line in tokens_by_line:
247 247 assign_ix = _find_assign_op(line)
248 248 if (assign_ix is not None) \
249 249 and (len(line) >= assign_ix + 2) \
250 250 and (line[assign_ix+1].string == '%') \
251 251 and (line[assign_ix+2].type == tokenize.NAME):
252 252 return cls(line[assign_ix+1].start)
253 253
254 254 def transform(self, lines: List[str]):
255 255 """Transform a magic assignment found by the ``find()`` classmethod.
256 256 """
257 257 start_line, start_col = self.start_line, self.start_col
258 258 lhs = lines[start_line][:start_col]
259 259 end_line = find_end_of_continued_line(lines, start_line)
260 260 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
261 261 assert rhs.startswith('%'), rhs
262 262 magic_name, _, args = rhs[1:].partition(' ')
263 263
264 264 lines_before = lines[:start_line]
265 265 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
266 266 new_line = lhs + call + '\n'
267 267 lines_after = lines[end_line+1:]
268 268
269 269 return lines_before + [new_line] + lines_after
270 270
271 271
272 272 class SystemAssign(TokenTransformBase):
273 273 """Transformer for assignments from system commands (a = !foo)"""
274 274 @classmethod
275 275 def find_pre_312(cls, tokens_by_line):
276 276 for line in tokens_by_line:
277 277 assign_ix = _find_assign_op(line)
278 278 if (assign_ix is not None) \
279 279 and not line[assign_ix].line.strip().startswith('=') \
280 280 and (len(line) >= assign_ix + 2) \
281 281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
282 282 ix = assign_ix + 1
283 283
284 284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
285 285 if line[ix].string == '!':
286 286 return cls(line[ix].start)
287 287 elif not line[ix].string.isspace():
288 288 break
289 289 ix += 1
290 290
291 291 @classmethod
292 292 def find_post_312(cls, tokens_by_line):
293 293 for line in tokens_by_line:
294 294 assign_ix = _find_assign_op(line)
295 295 if (
296 296 (assign_ix is not None)
297 297 and not line[assign_ix].line.strip().startswith("=")
298 298 and (len(line) >= assign_ix + 2)
299 299 and (line[assign_ix + 1].type == tokenize.OP)
300 300 and (line[assign_ix + 1].string == "!")
301 301 ):
302 302 return cls(line[assign_ix + 1].start)
303 303
304 304 @classmethod
305 305 def find(cls, tokens_by_line):
306 306 """Find the first system assignment (a = !foo) in the cell."""
307 307 if sys.version_info < (3, 12):
308 308 return cls.find_pre_312(tokens_by_line)
309 309 return cls.find_post_312(tokens_by_line)
310 310
311 311 def transform(self, lines: List[str]):
312 312 """Transform a system assignment found by the ``find()`` classmethod.
313 313 """
314 314 start_line, start_col = self.start_line, self.start_col
315 315
316 316 lhs = lines[start_line][:start_col]
317 317 end_line = find_end_of_continued_line(lines, start_line)
318 318 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
319 319 assert rhs.startswith('!'), rhs
320 320 cmd = rhs[1:]
321 321
322 322 lines_before = lines[:start_line]
323 323 call = "get_ipython().getoutput({!r})".format(cmd)
324 324 new_line = lhs + call + '\n'
325 325 lines_after = lines[end_line + 1:]
326 326
327 327 return lines_before + [new_line] + lines_after
328 328
329 329 # The escape sequences that define the syntax transformations IPython will
330 330 # apply to user input. These can NOT be just changed here: many regular
331 331 # expressions and other parts of the code may use their hardcoded values, and
332 332 # for all intents and purposes they constitute the 'IPython syntax', so they
333 333 # should be considered fixed.
334 334
335 335 ESC_SHELL = '!' # Send line to underlying system shell
336 336 ESC_SH_CAP = '!!' # Send line to system shell and capture output
337 337 ESC_HELP = '?' # Find information about object
338 338 ESC_HELP2 = '??' # Find extra-detailed information about object
339 339 ESC_MAGIC = '%' # Call magic function
340 340 ESC_MAGIC2 = '%%' # Call cell-magic function
341 341 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
342 342 ESC_QUOTE2 = ';' # Quote all args as a single string, call
343 343 ESC_PAREN = '/' # Call first argument with rest of line as arguments
344 344
345 345 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
346 346 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
347 347
348 348 def _make_help_call(target, esc):
349 349 """Prepares a pinfo(2)/psearch call from a target name and the escape
350 350 (i.e. ? or ??)"""
351 351 method = 'pinfo2' if esc == '??' \
352 352 else 'psearch' if '*' in target \
353 353 else 'pinfo'
354 354 arg = " ".join([method, target])
355 355 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
356 356 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
357 357 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
358 358 return "get_ipython().run_line_magic(%r, %r)" % (t_magic_name, t_magic_arg_s)
359 359
360 360
361 361 def _tr_help(content):
362 362 """Translate lines escaped with: ?
363 363
364 364 A naked help line should fire the intro help screen (shell.show_usage())
365 365 """
366 366 if not content:
367 367 return 'get_ipython().show_usage()'
368 368
369 369 return _make_help_call(content, '?')
370 370
371 371 def _tr_help2(content):
372 372 """Translate lines escaped with: ??
373 373
374 374 A naked help line should fire the intro help screen (shell.show_usage())
375 375 """
376 376 if not content:
377 377 return 'get_ipython().show_usage()'
378 378
379 379 return _make_help_call(content, '??')
380 380
381 381 def _tr_magic(content):
382 382 "Translate lines escaped with a percent sign: %"
383 383 name, _, args = content.partition(' ')
384 384 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
385 385
386 386 def _tr_quote(content):
387 387 "Translate lines escaped with a comma: ,"
388 388 name, _, args = content.partition(' ')
389 389 return '%s("%s")' % (name, '", "'.join(args.split()) )
390 390
391 391 def _tr_quote2(content):
392 392 "Translate lines escaped with a semicolon: ;"
393 393 name, _, args = content.partition(' ')
394 394 return '%s("%s")' % (name, args)
395 395
396 396 def _tr_paren(content):
397 397 "Translate lines escaped with a slash: /"
398 name, _, args = content.partition(' ')
398 name, _, args = content.partition(" ")
399 if name == "":
400 raise SyntaxError(f'"{ESC_SHELL}" must be followed by a callable name')
401
399 402 return '%s(%s)' % (name, ", ".join(args.split()))
400 403
401 404 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
402 405 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
403 406 ESC_HELP : _tr_help,
404 407 ESC_HELP2 : _tr_help2,
405 408 ESC_MAGIC : _tr_magic,
406 409 ESC_QUOTE : _tr_quote,
407 410 ESC_QUOTE2 : _tr_quote2,
408 411 ESC_PAREN : _tr_paren }
409 412
410 413 class EscapedCommand(TokenTransformBase):
411 414 """Transformer for escaped commands like %foo, !foo, or /foo"""
412 415 @classmethod
413 416 def find(cls, tokens_by_line):
414 417 """Find the first escaped command (%foo, !foo, etc.) in the cell.
415 418 """
416 419 for line in tokens_by_line:
417 420 if not line:
418 421 continue
419 422 ix = 0
420 423 ll = len(line)
421 424 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
422 425 ix += 1
423 426 if ix >= ll:
424 427 continue
425 428 if line[ix].string in ESCAPE_SINGLES:
426 429 return cls(line[ix].start)
427 430
428 431 def transform(self, lines):
429 432 """Transform an escaped line found by the ``find()`` classmethod.
430 433 """
431 434 start_line, start_col = self.start_line, self.start_col
432 435
433 436 indent = lines[start_line][:start_col]
434 437 end_line = find_end_of_continued_line(lines, start_line)
435 438 line = assemble_continued_line(lines, (start_line, start_col), end_line)
436 439
437 440 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
438 441 escape, content = line[:2], line[2:]
439 442 else:
440 443 escape, content = line[:1], line[1:]
441 444
442 445 if escape in tr:
443 446 call = tr[escape](content)
444 447 else:
445 448 call = ''
446 449
447 450 lines_before = lines[:start_line]
448 451 new_line = indent + call + '\n'
449 452 lines_after = lines[end_line + 1:]
450 453
451 454 return lines_before + [new_line] + lines_after
452 455
453 456
454 457 _help_end_re = re.compile(
455 458 r"""(%{0,2}
456 459 (?!\d)[\w*]+ # Variable name
457 460 (\.(?!\d)[\w*]+|\[-?[0-9]+\])* # .etc.etc or [0], we only support literal integers.
458 461 )
459 462 (\?\??)$ # ? or ??
460 463 """,
461 464 re.VERBOSE,
462 465 )
463 466
464 467
465 468 class HelpEnd(TokenTransformBase):
466 469 """Transformer for help syntax: obj? and obj??"""
467 470 # This needs to be higher priority (lower number) than EscapedCommand so
468 471 # that inspecting magics (%foo?) works.
469 472 priority = 5
470 473
471 474 def __init__(self, start, q_locn):
472 475 super().__init__(start)
473 476 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
474 477 self.q_col = q_locn[1]
475 478
476 479 @classmethod
477 480 def find(cls, tokens_by_line):
478 481 """Find the first help command (foo?) in the cell.
479 482 """
480 483 for line in tokens_by_line:
481 484 # Last token is NEWLINE; look at last but one
482 485 if len(line) > 2 and line[-2].string == '?':
483 486 # Find the first token that's not INDENT/DEDENT
484 487 ix = 0
485 488 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
486 489 ix += 1
487 490 return cls(line[ix].start, line[-2].start)
488 491
489 492 def transform(self, lines):
490 493 """Transform a help command found by the ``find()`` classmethod.
491 494 """
492 495
493 496 piece = "".join(lines[self.start_line : self.q_line + 1])
494 497 indent, content = piece[: self.start_col], piece[self.start_col :]
495 498 lines_before = lines[: self.start_line]
496 499 lines_after = lines[self.q_line + 1 :]
497 500
498 501 m = _help_end_re.search(content)
499 502 if not m:
500 503 raise SyntaxError(content)
501 504 assert m is not None, content
502 505 target = m.group(1)
503 506 esc = m.group(3)
504 507
505 508
506 509 call = _make_help_call(target, esc)
507 510 new_line = indent + call + '\n'
508 511
509 512 return lines_before + [new_line] + lines_after
510 513
511 514 def make_tokens_by_line(lines:List[str]):
512 515 """Tokenize a series of lines and group tokens by line.
513 516
514 517 The tokens for a multiline Python string or expression are grouped as one
515 518 line. All lines except the last lines should keep their line ending ('\\n',
516 519 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
517 520 for example when passing block of text to this function.
518 521
519 522 """
520 523 # NL tokens are used inside multiline expressions, but also after blank
521 524 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
522 525 # We want to group the former case together but split the latter, so we
523 526 # track parentheses level, similar to the internals of tokenize.
524 527
525 528 # reexported from token on 3.7+
526 529 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
527 530 tokens_by_line: List[List[Any]] = [[]]
528 531 if len(lines) > 1 and not lines[0].endswith(("\n", "\r", "\r\n", "\x0b", "\x0c")):
529 532 warnings.warn(
530 533 "`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified",
531 534 stacklevel=2,
532 535 )
533 536 parenlev = 0
534 537 try:
535 538 for token in tokenutil.generate_tokens_catch_errors(
536 539 iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
537 540 ):
538 541 tokens_by_line[-1].append(token)
539 542 if (token.type == NEWLINE) \
540 543 or ((token.type == NL) and (parenlev <= 0)):
541 544 tokens_by_line.append([])
542 545 elif token.string in {'(', '[', '{'}:
543 546 parenlev += 1
544 547 elif token.string in {')', ']', '}'}:
545 548 if parenlev > 0:
546 549 parenlev -= 1
547 550 except tokenize.TokenError:
548 551 # Input ended in a multiline string or expression. That's OK for us.
549 552 pass
550 553
551 554
552 555 if not tokens_by_line[-1]:
553 556 tokens_by_line.pop()
554 557
555 558
556 559 return tokens_by_line
557 560
558 561
559 562 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
560 563 """Check if the depth of brackets in the list of tokens drops below 0"""
561 564 parenlev = 0
562 565 for token in tokens:
563 566 if token.string in {"(", "[", "{"}:
564 567 parenlev += 1
565 568 elif token.string in {")", "]", "}"}:
566 569 parenlev -= 1
567 570 if parenlev < 0:
568 571 return True
569 572 return False
570 573
571 574
572 575 def show_linewise_tokens(s: str):
573 576 """For investigation and debugging"""
574 577 warnings.warn(
575 578 "show_linewise_tokens is deprecated since IPython 8.6",
576 579 DeprecationWarning,
577 580 stacklevel=2,
578 581 )
579 582 if not s.endswith("\n"):
580 583 s += "\n"
581 584 lines = s.splitlines(keepends=True)
582 585 for line in make_tokens_by_line(lines):
583 586 print("Line -------")
584 587 for tokinfo in line:
585 588 print(" ", tokinfo)
586 589
587 590 # Arbitrary limit to prevent getting stuck in infinite loops
588 591 TRANSFORM_LOOP_LIMIT = 500
589 592
590 593 class TransformerManager:
591 594 """Applies various transformations to a cell or code block.
592 595
593 596 The key methods for external use are ``transform_cell()``
594 597 and ``check_complete()``.
595 598 """
596 599 def __init__(self):
597 600 self.cleanup_transforms = [
598 601 leading_empty_lines,
599 602 leading_indent,
600 603 classic_prompt,
601 604 ipython_prompt,
602 605 ]
603 606 self.line_transforms = [
604 607 cell_magic,
605 608 ]
606 609 self.token_transformers = [
607 610 MagicAssign,
608 611 SystemAssign,
609 612 EscapedCommand,
610 613 HelpEnd,
611 614 ]
612 615
613 616 def do_one_token_transform(self, lines):
614 617 """Find and run the transform earliest in the code.
615 618
616 619 Returns (changed, lines).
617 620
618 621 This method is called repeatedly until changed is False, indicating
619 622 that all available transformations are complete.
620 623
621 624 The tokens following IPython special syntax might not be valid, so
622 625 the transformed code is retokenised every time to identify the next
623 626 piece of special syntax. Hopefully long code cells are mostly valid
624 627 Python, not using lots of IPython special syntax, so this shouldn't be
625 628 a performance issue.
626 629 """
627 630 tokens_by_line = make_tokens_by_line(lines)
628 631 candidates = []
629 632 for transformer_cls in self.token_transformers:
630 633 transformer = transformer_cls.find(tokens_by_line)
631 634 if transformer:
632 635 candidates.append(transformer)
633 636
634 637 if not candidates:
635 638 # Nothing to transform
636 639 return False, lines
637 640 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
638 641 for transformer in ordered_transformers:
639 642 try:
640 643 return True, transformer.transform(lines)
641 644 except SyntaxError:
642 645 pass
643 646 return False, lines
644 647
645 648 def do_token_transforms(self, lines):
646 649 for _ in range(TRANSFORM_LOOP_LIMIT):
647 650 changed, lines = self.do_one_token_transform(lines)
648 651 if not changed:
649 652 return lines
650 653
651 654 raise RuntimeError("Input transformation still changing after "
652 655 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
653 656
654 657 def transform_cell(self, cell: str) -> str:
655 658 """Transforms a cell of input code"""
656 659 if not cell.endswith('\n'):
657 660 cell += '\n' # Ensure the cell has a trailing newline
658 661 lines = cell.splitlines(keepends=True)
659 662 for transform in self.cleanup_transforms + self.line_transforms:
660 663 lines = transform(lines)
661 664
662 665 lines = self.do_token_transforms(lines)
663 666 return ''.join(lines)
664 667
665 668 def check_complete(self, cell: str):
666 669 """Return whether a block of code is ready to execute, or should be continued
667 670
668 671 Parameters
669 672 ----------
670 673 cell : string
671 674 Python input code, which can be multiline.
672 675
673 676 Returns
674 677 -------
675 678 status : str
676 679 One of 'complete', 'incomplete', or 'invalid' if source is not a
677 680 prefix of valid code.
678 681 indent_spaces : int or None
679 682 The number of spaces by which to indent the next line of code. If
680 683 status is not 'incomplete', this is None.
681 684 """
682 685 # Remember if the lines ends in a new line.
683 686 ends_with_newline = False
684 687 for character in reversed(cell):
685 688 if character == '\n':
686 689 ends_with_newline = True
687 690 break
688 691 elif character.strip():
689 692 break
690 693 else:
691 694 continue
692 695
693 696 if not ends_with_newline:
694 697 # Append an newline for consistent tokenization
695 698 # See https://bugs.python.org/issue33899
696 699 cell += '\n'
697 700
698 701 lines = cell.splitlines(keepends=True)
699 702
700 703 if not lines:
701 704 return 'complete', None
702 705
703 706 for line in reversed(lines):
704 707 if not line.strip():
705 708 continue
706 709 elif line.strip("\n").endswith("\\"):
707 710 return "incomplete", find_last_indent(lines)
708 711 else:
709 712 break
710 713
711 714 try:
712 715 for transform in self.cleanup_transforms:
713 716 if not getattr(transform, 'has_side_effects', False):
714 717 lines = transform(lines)
715 718 except SyntaxError:
716 719 return 'invalid', None
717 720
718 721 if lines[0].startswith('%%'):
719 722 # Special case for cell magics - completion marked by blank line
720 723 if lines[-1].strip():
721 724 return 'incomplete', find_last_indent(lines)
722 725 else:
723 726 return 'complete', None
724 727
725 728 try:
726 729 for transform in self.line_transforms:
727 730 if not getattr(transform, 'has_side_effects', False):
728 731 lines = transform(lines)
729 732 lines = self.do_token_transforms(lines)
730 733 except SyntaxError:
731 734 return 'invalid', None
732 735
733 736 tokens_by_line = make_tokens_by_line(lines)
734 737
735 738 # Bail if we got one line and there are more closing parentheses than
736 739 # the opening ones
737 740 if (
738 741 len(lines) == 1
739 742 and tokens_by_line
740 743 and has_sunken_brackets(tokens_by_line[0])
741 744 ):
742 745 return "invalid", None
743 746
744 747 if not tokens_by_line:
745 748 return 'incomplete', find_last_indent(lines)
746 749
747 750 if (
748 751 tokens_by_line[-1][-1].type != tokenize.ENDMARKER
749 752 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
750 753 ):
751 754 # We're in a multiline string or expression
752 755 return 'incomplete', find_last_indent(lines)
753 756
754 757 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
755 758
756 759 # Pop the last line which only contains DEDENTs and ENDMARKER
757 760 last_token_line = None
758 761 if {t.type for t in tokens_by_line[-1]} in [
759 762 {tokenize.DEDENT, tokenize.ENDMARKER},
760 763 {tokenize.ENDMARKER}
761 764 ] and len(tokens_by_line) > 1:
762 765 last_token_line = tokens_by_line.pop()
763 766
764 767 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
765 768 tokens_by_line[-1].pop()
766 769
767 770 if not tokens_by_line[-1]:
768 771 return 'incomplete', find_last_indent(lines)
769 772
770 773 if tokens_by_line[-1][-1].string == ':':
771 774 # The last line starts a block (e.g. 'if foo:')
772 775 ix = 0
773 776 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
774 777 ix += 1
775 778
776 779 indent = tokens_by_line[-1][ix].start[1]
777 780 return 'incomplete', indent + 4
778 781
779 782 if tokens_by_line[-1][0].line.endswith('\\'):
780 783 return 'incomplete', None
781 784
782 785 # At this point, our checks think the code is complete (or invalid).
783 786 # We'll use codeop.compile_command to check this with the real parser
784 787 try:
785 788 with warnings.catch_warnings():
786 789 warnings.simplefilter('error', SyntaxWarning)
787 790 res = compile_command(''.join(lines), symbol='exec')
788 791 except (SyntaxError, OverflowError, ValueError, TypeError,
789 792 MemoryError, SyntaxWarning):
790 793 return 'invalid', None
791 794 else:
792 795 if res is None:
793 796 return 'incomplete', find_last_indent(lines)
794 797
795 798 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
796 799 if ends_with_newline:
797 800 return 'complete', None
798 801 return 'incomplete', find_last_indent(lines)
799 802
800 803 # If there's a blank line at the end, assume we're ready to execute
801 804 if not lines[-1].strip():
802 805 return 'complete', None
803 806
804 807 return 'complete', None
805 808
806 809
807 810 def find_last_indent(lines):
808 811 m = _indent_re.match(lines[-1])
809 812 if not m:
810 813 return 0
811 814 return len(m.group(0).replace('\t', ' '*4))
812 815
813 816
814 817 class MaybeAsyncCompile(Compile):
815 818 def __init__(self, extra_flags=0):
816 819 super().__init__()
817 820 self.flags |= extra_flags
818 821
819 822
820 823 class MaybeAsyncCommandCompiler(CommandCompiler):
821 824 def __init__(self, extra_flags=0):
822 825 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
823 826
824 827
825 828 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
826 829
827 830 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
General Comments 0
You need to be logged in to leave comments. Login now