##// END OF EJS Templates
Remove show_linewise_tokens deprecated in 8.6 (#14571)
M Bussonnier -
r29004:63ea325d merge
parent child Browse files
Show More
@@ -1,830 +1,814
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 import ast
14 14 from codeop import CommandCompiler, Compile
15 15 import re
16 16 import sys
17 17 import tokenize
18 18 from typing import List, Tuple, Optional, Any
19 19 import warnings
20 20
21 21 from IPython.utils import tokenutil
22 22
23 23 _indent_re = re.compile(r'^[ \t]+')
24 24
25 25 def leading_empty_lines(lines):
26 26 """Remove leading empty lines
27 27
28 28 If the leading lines are empty or contain only whitespace, they will be
29 29 removed.
30 30 """
31 31 if not lines:
32 32 return lines
33 33 for i, line in enumerate(lines):
34 34 if line and not line.isspace():
35 35 return lines[i:]
36 36 return lines
37 37
38 38 def leading_indent(lines):
39 39 """Remove leading indentation.
40 40
41 41 If the first line starts with a spaces or tabs, the same whitespace will be
42 42 removed from each following line in the cell.
43 43 """
44 44 if not lines:
45 45 return lines
46 46 m = _indent_re.match(lines[0])
47 47 if not m:
48 48 return lines
49 49 space = m.group(0)
50 50 n = len(space)
51 51 return [l[n:] if l.startswith(space) else l
52 52 for l in lines]
53 53
54 54 class PromptStripper:
55 55 """Remove matching input prompts from a block of input.
56 56
57 57 Parameters
58 58 ----------
59 59 prompt_re : regular expression
60 60 A regular expression matching any input prompt (including continuation,
61 61 e.g. ``...``)
62 62 initial_re : regular expression, optional
63 63 A regular expression matching only the initial prompt, but not continuation.
64 64 If no initial expression is given, prompt_re will be used everywhere.
65 65 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
66 66 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
67 67
68 68 Notes
69 69 -----
70 70
71 71 If initial_re and prompt_re differ,
72 72 only initial_re will be tested against the first line.
73 73 If any prompt is found on the first two lines,
74 74 prompts will be stripped from the rest of the block.
75 75 """
76 76 def __init__(self, prompt_re, initial_re=None):
77 77 self.prompt_re = prompt_re
78 78 self.initial_re = initial_re or prompt_re
79 79
80 80 def _strip(self, lines):
81 81 return [self.prompt_re.sub('', l, count=1) for l in lines]
82 82
83 83 def __call__(self, lines):
84 84 if not lines:
85 85 return lines
86 86 if self.initial_re.match(lines[0]) or \
87 87 (len(lines) > 1 and self.prompt_re.match(lines[1])):
88 88 return self._strip(lines)
89 89 return lines
90 90
91 91 classic_prompt = PromptStripper(
92 92 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
93 93 initial_re=re.compile(r'^>>>( |$)')
94 94 )
95 95
96 96 ipython_prompt = PromptStripper(
97 97 re.compile(
98 98 r"""
99 99 ^( # Match from the beginning of a line, either:
100 100
101 101 # 1. First-line prompt:
102 102 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
103 103 In\ # The 'In' of the prompt, with a space
104 104 \[\d+\]: # Command index, as displayed in the prompt
105 105 \ # With a mandatory trailing space
106 106
107 107 | # ... or ...
108 108
109 109 # 2. The three dots of the multiline prompt
110 110 \s* # All leading whitespace characters
111 111 \.{3,}: # The three (or more) dots
112 112 \ ? # With an optional trailing space
113 113
114 114 )
115 115 """,
116 116 re.VERBOSE,
117 117 )
118 118 )
119 119
120 120
121 121 def cell_magic(lines):
122 122 if not lines or not lines[0].startswith('%%'):
123 123 return lines
124 124 if re.match(r'%%\w+\?', lines[0]):
125 125 # This case will be handled by help_end
126 126 return lines
127 127 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
128 128 body = ''.join(lines[1:])
129 129 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
130 130 % (magic_name, first_line, body)]
131 131
132 132
133 133 def _find_assign_op(token_line) -> Optional[int]:
134 134 """Get the index of the first assignment in the line ('=' not inside brackets)
135 135
136 136 Note: We don't try to support multiple special assignment (a = b = %foo)
137 137 """
138 138 paren_level = 0
139 139 for i, ti in enumerate(token_line):
140 140 s = ti.string
141 141 if s == '=' and paren_level == 0:
142 142 return i
143 143 if s in {'(','[','{'}:
144 144 paren_level += 1
145 145 elif s in {')', ']', '}'}:
146 146 if paren_level > 0:
147 147 paren_level -= 1
148 148 return None
149 149
150 150 def find_end_of_continued_line(lines, start_line: int):
151 151 """Find the last line of a line explicitly extended using backslashes.
152 152
153 153 Uses 0-indexed line numbers.
154 154 """
155 155 end_line = start_line
156 156 while lines[end_line].endswith('\\\n'):
157 157 end_line += 1
158 158 if end_line >= len(lines):
159 159 break
160 160 return end_line
161 161
162 162 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
163 163 r"""Assemble a single line from multiple continued line pieces
164 164
165 165 Continued lines are lines ending in ``\``, and the line following the last
166 166 ``\`` in the block.
167 167
168 168 For example, this code continues over multiple lines::
169 169
170 170 if (assign_ix is not None) \
171 171 and (len(line) >= assign_ix + 2) \
172 172 and (line[assign_ix+1].string == '%') \
173 173 and (line[assign_ix+2].type == tokenize.NAME):
174 174
175 175 This statement contains four continued line pieces.
176 176 Assembling these pieces into a single line would give::
177 177
178 178 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
179 179
180 180 This uses 0-indexed line numbers. *start* is (lineno, colno).
181 181
182 182 Used to allow ``%magic`` and ``!system`` commands to be continued over
183 183 multiple lines.
184 184 """
185 185 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
186 186 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
187 187 + [parts[-1].rstrip()]) # Strip newline from last line
188 188
189 189 class TokenTransformBase:
190 190 """Base class for transformations which examine tokens.
191 191
192 192 Special syntax should not be transformed when it occurs inside strings or
193 193 comments. This is hard to reliably avoid with regexes. The solution is to
194 194 tokenise the code as Python, and recognise the special syntax in the tokens.
195 195
196 196 IPython's special syntax is not valid Python syntax, so tokenising may go
197 197 wrong after the special syntax starts. These classes therefore find and
198 198 transform *one* instance of special syntax at a time into regular Python
199 199 syntax. After each transformation, tokens are regenerated to find the next
200 200 piece of special syntax.
201 201
202 202 Subclasses need to implement one class method (find)
203 203 and one regular method (transform).
204 204
205 205 The priority attribute can select which transformation to apply if multiple
206 206 transformers match in the same place. Lower numbers have higher priority.
207 207 This allows "%magic?" to be turned into a help call rather than a magic call.
208 208 """
209 209 # Lower numbers -> higher priority (for matches in the same location)
210 210 priority = 10
211 211
212 212 def sortby(self):
213 213 return self.start_line, self.start_col, self.priority
214 214
215 215 def __init__(self, start):
216 216 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
217 217 self.start_col = start[1]
218 218
219 219 @classmethod
220 220 def find(cls, tokens_by_line):
221 221 """Find one instance of special syntax in the provided tokens.
222 222
223 223 Tokens are grouped into logical lines for convenience,
224 224 so it is easy to e.g. look at the first token of each line.
225 225 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
226 226
227 227 This should return an instance of its class, pointing to the start
228 228 position it has found, or None if it found no match.
229 229 """
230 230 raise NotImplementedError
231 231
232 232 def transform(self, lines: List[str]):
233 233 """Transform one instance of special syntax found by ``find()``
234 234
235 235 Takes a list of strings representing physical lines,
236 236 returns a similar list of transformed lines.
237 237 """
238 238 raise NotImplementedError
239 239
240 240 class MagicAssign(TokenTransformBase):
241 241 """Transformer for assignments from magics (a = %foo)"""
242 242 @classmethod
243 243 def find(cls, tokens_by_line):
244 244 """Find the first magic assignment (a = %foo) in the cell.
245 245 """
246 246 for line in tokens_by_line:
247 247 assign_ix = _find_assign_op(line)
248 248 if (assign_ix is not None) \
249 249 and (len(line) >= assign_ix + 2) \
250 250 and (line[assign_ix+1].string == '%') \
251 251 and (line[assign_ix+2].type == tokenize.NAME):
252 252 return cls(line[assign_ix+1].start)
253 253
254 254 def transform(self, lines: List[str]):
255 255 """Transform a magic assignment found by the ``find()`` classmethod.
256 256 """
257 257 start_line, start_col = self.start_line, self.start_col
258 258 lhs = lines[start_line][:start_col]
259 259 end_line = find_end_of_continued_line(lines, start_line)
260 260 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
261 261 assert rhs.startswith('%'), rhs
262 262 magic_name, _, args = rhs[1:].partition(' ')
263 263
264 264 lines_before = lines[:start_line]
265 265 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
266 266 new_line = lhs + call + '\n'
267 267 lines_after = lines[end_line+1:]
268 268
269 269 return lines_before + [new_line] + lines_after
270 270
271 271
272 272 class SystemAssign(TokenTransformBase):
273 273 """Transformer for assignments from system commands (a = !foo)"""
274 274 @classmethod
275 275 def find_pre_312(cls, tokens_by_line):
276 276 for line in tokens_by_line:
277 277 assign_ix = _find_assign_op(line)
278 278 if (assign_ix is not None) \
279 279 and not line[assign_ix].line.strip().startswith('=') \
280 280 and (len(line) >= assign_ix + 2) \
281 281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
282 282 ix = assign_ix + 1
283 283
284 284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
285 285 if line[ix].string == '!':
286 286 return cls(line[ix].start)
287 287 elif not line[ix].string.isspace():
288 288 break
289 289 ix += 1
290 290
291 291 @classmethod
292 292 def find_post_312(cls, tokens_by_line):
293 293 for line in tokens_by_line:
294 294 assign_ix = _find_assign_op(line)
295 295 if (
296 296 (assign_ix is not None)
297 297 and not line[assign_ix].line.strip().startswith("=")
298 298 and (len(line) >= assign_ix + 2)
299 299 and (line[assign_ix + 1].type == tokenize.OP)
300 300 and (line[assign_ix + 1].string == "!")
301 301 ):
302 302 return cls(line[assign_ix + 1].start)
303 303
304 304 @classmethod
305 305 def find(cls, tokens_by_line):
306 306 """Find the first system assignment (a = !foo) in the cell."""
307 307 if sys.version_info < (3, 12):
308 308 return cls.find_pre_312(tokens_by_line)
309 309 return cls.find_post_312(tokens_by_line)
310 310
311 311 def transform(self, lines: List[str]):
312 312 """Transform a system assignment found by the ``find()`` classmethod.
313 313 """
314 314 start_line, start_col = self.start_line, self.start_col
315 315
316 316 lhs = lines[start_line][:start_col]
317 317 end_line = find_end_of_continued_line(lines, start_line)
318 318 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
319 319 assert rhs.startswith('!'), rhs
320 320 cmd = rhs[1:]
321 321
322 322 lines_before = lines[:start_line]
323 323 call = "get_ipython().getoutput({!r})".format(cmd)
324 324 new_line = lhs + call + '\n'
325 325 lines_after = lines[end_line + 1:]
326 326
327 327 return lines_before + [new_line] + lines_after
328 328
329 329 # The escape sequences that define the syntax transformations IPython will
330 330 # apply to user input. These can NOT be just changed here: many regular
331 331 # expressions and other parts of the code may use their hardcoded values, and
332 332 # for all intents and purposes they constitute the 'IPython syntax', so they
333 333 # should be considered fixed.
334 334
335 335 ESC_SHELL = '!' # Send line to underlying system shell
336 336 ESC_SH_CAP = '!!' # Send line to system shell and capture output
337 337 ESC_HELP = '?' # Find information about object
338 338 ESC_HELP2 = '??' # Find extra-detailed information about object
339 339 ESC_MAGIC = '%' # Call magic function
340 340 ESC_MAGIC2 = '%%' # Call cell-magic function
341 341 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
342 342 ESC_QUOTE2 = ';' # Quote all args as a single string, call
343 343 ESC_PAREN = '/' # Call first argument with rest of line as arguments
344 344
345 345 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
346 346 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
347 347
348 348 def _make_help_call(target, esc):
349 349 """Prepares a pinfo(2)/psearch call from a target name and the escape
350 350 (i.e. ? or ??)"""
351 351 method = 'pinfo2' if esc == '??' \
352 352 else 'psearch' if '*' in target \
353 353 else 'pinfo'
354 354 arg = " ".join([method, target])
355 355 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
356 356 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
357 357 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
358 358 return "get_ipython().run_line_magic(%r, %r)" % (t_magic_name, t_magic_arg_s)
359 359
360 360
361 361 def _tr_help(content):
362 362 """Translate lines escaped with: ?
363 363
364 364 A naked help line should fire the intro help screen (shell.show_usage())
365 365 """
366 366 if not content:
367 367 return 'get_ipython().show_usage()'
368 368
369 369 return _make_help_call(content, '?')
370 370
371 371 def _tr_help2(content):
372 372 """Translate lines escaped with: ??
373 373
374 374 A naked help line should fire the intro help screen (shell.show_usage())
375 375 """
376 376 if not content:
377 377 return 'get_ipython().show_usage()'
378 378
379 379 return _make_help_call(content, '??')
380 380
381 381 def _tr_magic(content):
382 382 "Translate lines escaped with a percent sign: %"
383 383 name, _, args = content.partition(' ')
384 384 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
385 385
386 386 def _tr_quote(content):
387 387 "Translate lines escaped with a comma: ,"
388 388 name, _, args = content.partition(' ')
389 389 return '%s("%s")' % (name, '", "'.join(args.split()) )
390 390
391 391 def _tr_quote2(content):
392 392 "Translate lines escaped with a semicolon: ;"
393 393 name, _, args = content.partition(' ')
394 394 return '%s("%s")' % (name, args)
395 395
396 396 def _tr_paren(content):
397 397 "Translate lines escaped with a slash: /"
398 398 name, _, args = content.partition(" ")
399 399 if name == "":
400 400 raise SyntaxError(f'"{ESC_SHELL}" must be followed by a callable name')
401 401
402 402 return '%s(%s)' % (name, ", ".join(args.split()))
403 403
404 404 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
405 405 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
406 406 ESC_HELP : _tr_help,
407 407 ESC_HELP2 : _tr_help2,
408 408 ESC_MAGIC : _tr_magic,
409 409 ESC_QUOTE : _tr_quote,
410 410 ESC_QUOTE2 : _tr_quote2,
411 411 ESC_PAREN : _tr_paren }
412 412
413 413 class EscapedCommand(TokenTransformBase):
414 414 """Transformer for escaped commands like %foo, !foo, or /foo"""
415 415 @classmethod
416 416 def find(cls, tokens_by_line):
417 417 """Find the first escaped command (%foo, !foo, etc.) in the cell.
418 418 """
419 419 for line in tokens_by_line:
420 420 if not line:
421 421 continue
422 422 ix = 0
423 423 ll = len(line)
424 424 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
425 425 ix += 1
426 426 if ix >= ll:
427 427 continue
428 428 if line[ix].string in ESCAPE_SINGLES:
429 429 return cls(line[ix].start)
430 430
431 431 def transform(self, lines):
432 432 """Transform an escaped line found by the ``find()`` classmethod.
433 433 """
434 434 start_line, start_col = self.start_line, self.start_col
435 435
436 436 indent = lines[start_line][:start_col]
437 437 end_line = find_end_of_continued_line(lines, start_line)
438 438 line = assemble_continued_line(lines, (start_line, start_col), end_line)
439 439
440 440 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
441 441 escape, content = line[:2], line[2:]
442 442 else:
443 443 escape, content = line[:1], line[1:]
444 444
445 445 if escape in tr:
446 446 call = tr[escape](content)
447 447 else:
448 448 call = ''
449 449
450 450 lines_before = lines[:start_line]
451 451 new_line = indent + call + '\n'
452 452 lines_after = lines[end_line + 1:]
453 453
454 454 return lines_before + [new_line] + lines_after
455 455
456 456
457 457 _help_end_re = re.compile(
458 458 r"""(%{0,2}
459 459 (?!\d)[\w*]+ # Variable name
460 460 (\.(?!\d)[\w*]+|\[-?[0-9]+\])* # .etc.etc or [0], we only support literal integers.
461 461 )
462 462 (\?\??)$ # ? or ??
463 463 """,
464 464 re.VERBOSE,
465 465 )
466 466
467 467
468 468 class HelpEnd(TokenTransformBase):
469 469 """Transformer for help syntax: obj? and obj??"""
470 470 # This needs to be higher priority (lower number) than EscapedCommand so
471 471 # that inspecting magics (%foo?) works.
472 472 priority = 5
473 473
474 474 def __init__(self, start, q_locn):
475 475 super().__init__(start)
476 476 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
477 477 self.q_col = q_locn[1]
478 478
479 479 @classmethod
480 480 def find(cls, tokens_by_line):
481 481 """Find the first help command (foo?) in the cell.
482 482 """
483 483 for line in tokens_by_line:
484 484 # Last token is NEWLINE; look at last but one
485 485 if len(line) > 2 and line[-2].string == '?':
486 486 # Find the first token that's not INDENT/DEDENT
487 487 ix = 0
488 488 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
489 489 ix += 1
490 490 return cls(line[ix].start, line[-2].start)
491 491
492 492 def transform(self, lines):
493 493 """Transform a help command found by the ``find()`` classmethod.
494 494 """
495 495
496 496 piece = "".join(lines[self.start_line : self.q_line + 1])
497 497 indent, content = piece[: self.start_col], piece[self.start_col :]
498 498 lines_before = lines[: self.start_line]
499 499 lines_after = lines[self.q_line + 1 :]
500 500
501 501 m = _help_end_re.search(content)
502 502 if not m:
503 503 raise SyntaxError(content)
504 504 assert m is not None, content
505 505 target = m.group(1)
506 506 esc = m.group(3)
507 507
508 508
509 509 call = _make_help_call(target, esc)
510 510 new_line = indent + call + '\n'
511 511
512 512 return lines_before + [new_line] + lines_after
513 513
514 514 def make_tokens_by_line(lines:List[str]):
515 515 """Tokenize a series of lines and group tokens by line.
516 516
517 517 The tokens for a multiline Python string or expression are grouped as one
518 518 line. All lines except the last lines should keep their line ending ('\\n',
519 519 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
520 520 for example when passing block of text to this function.
521 521
522 522 """
523 523 # NL tokens are used inside multiline expressions, but also after blank
524 524 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
525 525 # We want to group the former case together but split the latter, so we
526 526 # track parentheses level, similar to the internals of tokenize.
527 527
528 528 # reexported from token on 3.7+
529 529 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
530 530 tokens_by_line: List[List[Any]] = [[]]
531 531 if len(lines) > 1 and not lines[0].endswith(("\n", "\r", "\r\n", "\x0b", "\x0c")):
532 532 warnings.warn(
533 533 "`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified",
534 534 stacklevel=2,
535 535 )
536 536 parenlev = 0
537 537 try:
538 538 for token in tokenutil.generate_tokens_catch_errors(
539 539 iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
540 540 ):
541 541 tokens_by_line[-1].append(token)
542 542 if (token.type == NEWLINE) \
543 543 or ((token.type == NL) and (parenlev <= 0)):
544 544 tokens_by_line.append([])
545 545 elif token.string in {'(', '[', '{'}:
546 546 parenlev += 1
547 547 elif token.string in {')', ']', '}'}:
548 548 if parenlev > 0:
549 549 parenlev -= 1
550 550 except tokenize.TokenError:
551 551 # Input ended in a multiline string or expression. That's OK for us.
552 552 pass
553 553
554 554
555 555 if not tokens_by_line[-1]:
556 556 tokens_by_line.pop()
557 557
558 558
559 559 return tokens_by_line
560 560
561 561
562 562 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
563 563 """Check if the depth of brackets in the list of tokens drops below 0"""
564 564 parenlev = 0
565 565 for token in tokens:
566 566 if token.string in {"(", "[", "{"}:
567 567 parenlev += 1
568 568 elif token.string in {")", "]", "}"}:
569 569 parenlev -= 1
570 570 if parenlev < 0:
571 571 return True
572 572 return False
573 573
574
575 def show_linewise_tokens(s: str):
576 """For investigation and debugging"""
577 warnings.warn(
578 "show_linewise_tokens is deprecated since IPython 8.6",
579 DeprecationWarning,
580 stacklevel=2,
581 )
582 if not s.endswith("\n"):
583 s += "\n"
584 lines = s.splitlines(keepends=True)
585 for line in make_tokens_by_line(lines):
586 print("Line -------")
587 for tokinfo in line:
588 print(" ", tokinfo)
589
590 574 # Arbitrary limit to prevent getting stuck in infinite loops
591 575 TRANSFORM_LOOP_LIMIT = 500
592 576
593 577 class TransformerManager:
594 578 """Applies various transformations to a cell or code block.
595 579
596 580 The key methods for external use are ``transform_cell()``
597 581 and ``check_complete()``.
598 582 """
599 583 def __init__(self):
600 584 self.cleanup_transforms = [
601 585 leading_empty_lines,
602 586 leading_indent,
603 587 classic_prompt,
604 588 ipython_prompt,
605 589 ]
606 590 self.line_transforms = [
607 591 cell_magic,
608 592 ]
609 593 self.token_transformers = [
610 594 MagicAssign,
611 595 SystemAssign,
612 596 EscapedCommand,
613 597 HelpEnd,
614 598 ]
615 599
616 600 def do_one_token_transform(self, lines):
617 601 """Find and run the transform earliest in the code.
618 602
619 603 Returns (changed, lines).
620 604
621 605 This method is called repeatedly until changed is False, indicating
622 606 that all available transformations are complete.
623 607
624 608 The tokens following IPython special syntax might not be valid, so
625 609 the transformed code is retokenised every time to identify the next
626 610 piece of special syntax. Hopefully long code cells are mostly valid
627 611 Python, not using lots of IPython special syntax, so this shouldn't be
628 612 a performance issue.
629 613 """
630 614 tokens_by_line = make_tokens_by_line(lines)
631 615 candidates = []
632 616 for transformer_cls in self.token_transformers:
633 617 transformer = transformer_cls.find(tokens_by_line)
634 618 if transformer:
635 619 candidates.append(transformer)
636 620
637 621 if not candidates:
638 622 # Nothing to transform
639 623 return False, lines
640 624 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
641 625 for transformer in ordered_transformers:
642 626 try:
643 627 return True, transformer.transform(lines)
644 628 except SyntaxError:
645 629 pass
646 630 return False, lines
647 631
648 632 def do_token_transforms(self, lines):
649 633 for _ in range(TRANSFORM_LOOP_LIMIT):
650 634 changed, lines = self.do_one_token_transform(lines)
651 635 if not changed:
652 636 return lines
653 637
654 638 raise RuntimeError("Input transformation still changing after "
655 639 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
656 640
657 641 def transform_cell(self, cell: str) -> str:
658 642 """Transforms a cell of input code"""
659 643 if not cell.endswith('\n'):
660 644 cell += '\n' # Ensure the cell has a trailing newline
661 645 lines = cell.splitlines(keepends=True)
662 646 for transform in self.cleanup_transforms + self.line_transforms:
663 647 lines = transform(lines)
664 648
665 649 lines = self.do_token_transforms(lines)
666 650 return ''.join(lines)
667 651
668 652 def check_complete(self, cell: str):
669 653 """Return whether a block of code is ready to execute, or should be continued
670 654
671 655 Parameters
672 656 ----------
673 657 cell : string
674 658 Python input code, which can be multiline.
675 659
676 660 Returns
677 661 -------
678 662 status : str
679 663 One of 'complete', 'incomplete', or 'invalid' if source is not a
680 664 prefix of valid code.
681 665 indent_spaces : int or None
682 666 The number of spaces by which to indent the next line of code. If
683 667 status is not 'incomplete', this is None.
684 668 """
685 669 # Remember if the lines ends in a new line.
686 670 ends_with_newline = False
687 671 for character in reversed(cell):
688 672 if character == '\n':
689 673 ends_with_newline = True
690 674 break
691 675 elif character.strip():
692 676 break
693 677 else:
694 678 continue
695 679
696 680 if not ends_with_newline:
697 681 # Append an newline for consistent tokenization
698 682 # See https://bugs.python.org/issue33899
699 683 cell += '\n'
700 684
701 685 lines = cell.splitlines(keepends=True)
702 686
703 687 if not lines:
704 688 return 'complete', None
705 689
706 690 for line in reversed(lines):
707 691 if not line.strip():
708 692 continue
709 693 elif line.strip("\n").endswith("\\"):
710 694 return "incomplete", find_last_indent(lines)
711 695 else:
712 696 break
713 697
714 698 try:
715 699 for transform in self.cleanup_transforms:
716 700 if not getattr(transform, 'has_side_effects', False):
717 701 lines = transform(lines)
718 702 except SyntaxError:
719 703 return 'invalid', None
720 704
721 705 if lines[0].startswith('%%'):
722 706 # Special case for cell magics - completion marked by blank line
723 707 if lines[-1].strip():
724 708 return 'incomplete', find_last_indent(lines)
725 709 else:
726 710 return 'complete', None
727 711
728 712 try:
729 713 for transform in self.line_transforms:
730 714 if not getattr(transform, 'has_side_effects', False):
731 715 lines = transform(lines)
732 716 lines = self.do_token_transforms(lines)
733 717 except SyntaxError:
734 718 return 'invalid', None
735 719
736 720 tokens_by_line = make_tokens_by_line(lines)
737 721
738 722 # Bail if we got one line and there are more closing parentheses than
739 723 # the opening ones
740 724 if (
741 725 len(lines) == 1
742 726 and tokens_by_line
743 727 and has_sunken_brackets(tokens_by_line[0])
744 728 ):
745 729 return "invalid", None
746 730
747 731 if not tokens_by_line:
748 732 return 'incomplete', find_last_indent(lines)
749 733
750 734 if (
751 735 tokens_by_line[-1][-1].type != tokenize.ENDMARKER
752 736 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
753 737 ):
754 738 # We're in a multiline string or expression
755 739 return 'incomplete', find_last_indent(lines)
756 740
757 741 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
758 742
759 743 # Pop the last line which only contains DEDENTs and ENDMARKER
760 744 last_token_line = None
761 745 if {t.type for t in tokens_by_line[-1]} in [
762 746 {tokenize.DEDENT, tokenize.ENDMARKER},
763 747 {tokenize.ENDMARKER}
764 748 ] and len(tokens_by_line) > 1:
765 749 last_token_line = tokens_by_line.pop()
766 750
767 751 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
768 752 tokens_by_line[-1].pop()
769 753
770 754 if not tokens_by_line[-1]:
771 755 return 'incomplete', find_last_indent(lines)
772 756
773 757 if tokens_by_line[-1][-1].string == ':':
774 758 # The last line starts a block (e.g. 'if foo:')
775 759 ix = 0
776 760 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
777 761 ix += 1
778 762
779 763 indent = tokens_by_line[-1][ix].start[1]
780 764 return 'incomplete', indent + 4
781 765
782 766 if tokens_by_line[-1][0].line.endswith('\\'):
783 767 return 'incomplete', None
784 768
785 769 # At this point, our checks think the code is complete (or invalid).
786 770 # We'll use codeop.compile_command to check this with the real parser
787 771 try:
788 772 with warnings.catch_warnings():
789 773 warnings.simplefilter('error', SyntaxWarning)
790 774 res = compile_command(''.join(lines), symbol='exec')
791 775 except (SyntaxError, OverflowError, ValueError, TypeError,
792 776 MemoryError, SyntaxWarning):
793 777 return 'invalid', None
794 778 else:
795 779 if res is None:
796 780 return 'incomplete', find_last_indent(lines)
797 781
798 782 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
799 783 if ends_with_newline:
800 784 return 'complete', None
801 785 return 'incomplete', find_last_indent(lines)
802 786
803 787 # If there's a blank line at the end, assume we're ready to execute
804 788 if not lines[-1].strip():
805 789 return 'complete', None
806 790
807 791 return 'complete', None
808 792
809 793
810 794 def find_last_indent(lines):
811 795 m = _indent_re.match(lines[-1])
812 796 if not m:
813 797 return 0
814 798 return len(m.group(0).replace('\t', ' '*4))
815 799
816 800
817 801 class MaybeAsyncCompile(Compile):
818 802 def __init__(self, extra_flags=0):
819 803 super().__init__()
820 804 self.flags |= extra_flags
821 805
822 806
823 807 class MaybeAsyncCommandCompiler(CommandCompiler):
824 808 def __init__(self, extra_flags=0):
825 809 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
826 810
827 811
828 812 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
829 813
830 814 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
General Comments 0
You need to be logged in to leave comments. Login now