##// END OF EJS Templates
Strip vi-mode style prompts too (singleline)...
Blazej Michalik -
Show More
@@ -1,752 +1,773 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Optional, Any
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 64 Notes
65 65 -----
66 66
67 67 If initial_re and prompt_re differ,
68 68 only initial_re will be tested against the first line.
69 69 If any prompt is found on the first two lines,
70 70 prompts will be stripped from the rest of the block.
71 71 """
72 72 def __init__(self, prompt_re, initial_re=None):
73 73 self.prompt_re = prompt_re
74 74 self.initial_re = initial_re or prompt_re
75 75
76 76 def _strip(self, lines):
77 77 return [self.prompt_re.sub('', l, count=1) for l in lines]
78 78
79 79 def __call__(self, lines):
80 80 if not lines:
81 81 return lines
82 82 if self.initial_re.match(lines[0]) or \
83 83 (len(lines) > 1 and self.prompt_re.match(lines[1])):
84 84 return self._strip(lines)
85 85 return lines
86 86
87 87 classic_prompt = PromptStripper(
88 88 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
89 89 initial_re=re.compile(r'^>>>( |$)')
90 90 )
91 91
92 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
92 ipython_prompt = PromptStripper(re.compile(
93 r'''
94 ^( # Match from the beginning of a line, either:
95
96 # 1. First-line prompt:
97 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
98 In\ # The 'In' of the prompt, with a space
99 \[\d+\]: # Command index, as displayed in the prompt
100 \ # With a mandatory trailing space
101
102 | # ... or ...
103
104 # 2. The three dots of the multiline prompt
105 \s* # All leading whitespace characters
106 \.{3,}: # The three (or more) dots
107 \ ? # With an optional trailing space
108
109 )
110 ''',
111 re.VERBOSE
112 ))
113
93 114
94 115 def cell_magic(lines):
95 116 if not lines or not lines[0].startswith('%%'):
96 117 return lines
97 118 if re.match(r'%%\w+\?', lines[0]):
98 119 # This case will be handled by help_end
99 120 return lines
100 121 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
101 122 body = ''.join(lines[1:])
102 123 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
103 124 % (magic_name, first_line, body)]
104 125
105 126
106 127 def _find_assign_op(token_line) -> Optional[int]:
107 128 """Get the index of the first assignment in the line ('=' not inside brackets)
108 129
109 130 Note: We don't try to support multiple special assignment (a = b = %foo)
110 131 """
111 132 paren_level = 0
112 133 for i, ti in enumerate(token_line):
113 134 s = ti.string
114 135 if s == '=' and paren_level == 0:
115 136 return i
116 137 if s in {'(','[','{'}:
117 138 paren_level += 1
118 139 elif s in {')', ']', '}'}:
119 140 if paren_level > 0:
120 141 paren_level -= 1
121 142 return None
122 143
123 144 def find_end_of_continued_line(lines, start_line: int):
124 145 """Find the last line of a line explicitly extended using backslashes.
125 146
126 147 Uses 0-indexed line numbers.
127 148 """
128 149 end_line = start_line
129 150 while lines[end_line].endswith('\\\n'):
130 151 end_line += 1
131 152 if end_line >= len(lines):
132 153 break
133 154 return end_line
134 155
135 156 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
136 157 r"""Assemble a single line from multiple continued line pieces
137 158
138 159 Continued lines are lines ending in ``\``, and the line following the last
139 160 ``\`` in the block.
140 161
141 162 For example, this code continues over multiple lines::
142 163
143 164 if (assign_ix is not None) \
144 165 and (len(line) >= assign_ix + 2) \
145 166 and (line[assign_ix+1].string == '%') \
146 167 and (line[assign_ix+2].type == tokenize.NAME):
147 168
148 169 This statement contains four continued line pieces.
149 170 Assembling these pieces into a single line would give::
150 171
151 172 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
152 173
153 174 This uses 0-indexed line numbers. *start* is (lineno, colno).
154 175
155 176 Used to allow ``%magic`` and ``!system`` commands to be continued over
156 177 multiple lines.
157 178 """
158 179 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
159 180 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
160 181 + [parts[-1].rstrip()]) # Strip newline from last line
161 182
162 183 class TokenTransformBase:
163 184 """Base class for transformations which examine tokens.
164 185
165 186 Special syntax should not be transformed when it occurs inside strings or
166 187 comments. This is hard to reliably avoid with regexes. The solution is to
167 188 tokenise the code as Python, and recognise the special syntax in the tokens.
168 189
169 190 IPython's special syntax is not valid Python syntax, so tokenising may go
170 191 wrong after the special syntax starts. These classes therefore find and
171 192 transform *one* instance of special syntax at a time into regular Python
172 193 syntax. After each transformation, tokens are regenerated to find the next
173 194 piece of special syntax.
174 195
175 196 Subclasses need to implement one class method (find)
176 197 and one regular method (transform).
177 198
178 199 The priority attribute can select which transformation to apply if multiple
179 200 transformers match in the same place. Lower numbers have higher priority.
180 201 This allows "%magic?" to be turned into a help call rather than a magic call.
181 202 """
182 203 # Lower numbers -> higher priority (for matches in the same location)
183 204 priority = 10
184 205
185 206 def sortby(self):
186 207 return self.start_line, self.start_col, self.priority
187 208
188 209 def __init__(self, start):
189 210 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
190 211 self.start_col = start[1]
191 212
192 213 @classmethod
193 214 def find(cls, tokens_by_line):
194 215 """Find one instance of special syntax in the provided tokens.
195 216
196 217 Tokens are grouped into logical lines for convenience,
197 218 so it is easy to e.g. look at the first token of each line.
198 219 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
199 220
200 221 This should return an instance of its class, pointing to the start
201 222 position it has found, or None if it found no match.
202 223 """
203 224 raise NotImplementedError
204 225
205 226 def transform(self, lines: List[str]):
206 227 """Transform one instance of special syntax found by ``find()``
207 228
208 229 Takes a list of strings representing physical lines,
209 230 returns a similar list of transformed lines.
210 231 """
211 232 raise NotImplementedError
212 233
213 234 class MagicAssign(TokenTransformBase):
214 235 """Transformer for assignments from magics (a = %foo)"""
215 236 @classmethod
216 237 def find(cls, tokens_by_line):
217 238 """Find the first magic assignment (a = %foo) in the cell.
218 239 """
219 240 for line in tokens_by_line:
220 241 assign_ix = _find_assign_op(line)
221 242 if (assign_ix is not None) \
222 243 and (len(line) >= assign_ix + 2) \
223 244 and (line[assign_ix+1].string == '%') \
224 245 and (line[assign_ix+2].type == tokenize.NAME):
225 246 return cls(line[assign_ix+1].start)
226 247
227 248 def transform(self, lines: List[str]):
228 249 """Transform a magic assignment found by the ``find()`` classmethod.
229 250 """
230 251 start_line, start_col = self.start_line, self.start_col
231 252 lhs = lines[start_line][:start_col]
232 253 end_line = find_end_of_continued_line(lines, start_line)
233 254 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
234 255 assert rhs.startswith('%'), rhs
235 256 magic_name, _, args = rhs[1:].partition(' ')
236 257
237 258 lines_before = lines[:start_line]
238 259 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
239 260 new_line = lhs + call + '\n'
240 261 lines_after = lines[end_line+1:]
241 262
242 263 return lines_before + [new_line] + lines_after
243 264
244 265
245 266 class SystemAssign(TokenTransformBase):
246 267 """Transformer for assignments from system commands (a = !foo)"""
247 268 @classmethod
248 269 def find(cls, tokens_by_line):
249 270 """Find the first system assignment (a = !foo) in the cell.
250 271 """
251 272 for line in tokens_by_line:
252 273 assign_ix = _find_assign_op(line)
253 274 if (assign_ix is not None) \
254 275 and not line[assign_ix].line.strip().startswith('=') \
255 276 and (len(line) >= assign_ix + 2) \
256 277 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
257 278 ix = assign_ix + 1
258 279
259 280 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
260 281 if line[ix].string == '!':
261 282 return cls(line[ix].start)
262 283 elif not line[ix].string.isspace():
263 284 break
264 285 ix += 1
265 286
266 287 def transform(self, lines: List[str]):
267 288 """Transform a system assignment found by the ``find()`` classmethod.
268 289 """
269 290 start_line, start_col = self.start_line, self.start_col
270 291
271 292 lhs = lines[start_line][:start_col]
272 293 end_line = find_end_of_continued_line(lines, start_line)
273 294 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
274 295 assert rhs.startswith('!'), rhs
275 296 cmd = rhs[1:]
276 297
277 298 lines_before = lines[:start_line]
278 299 call = "get_ipython().getoutput({!r})".format(cmd)
279 300 new_line = lhs + call + '\n'
280 301 lines_after = lines[end_line + 1:]
281 302
282 303 return lines_before + [new_line] + lines_after
283 304
284 305 # The escape sequences that define the syntax transformations IPython will
285 306 # apply to user input. These can NOT be just changed here: many regular
286 307 # expressions and other parts of the code may use their hardcoded values, and
287 308 # for all intents and purposes they constitute the 'IPython syntax', so they
288 309 # should be considered fixed.
289 310
290 311 ESC_SHELL = '!' # Send line to underlying system shell
291 312 ESC_SH_CAP = '!!' # Send line to system shell and capture output
292 313 ESC_HELP = '?' # Find information about object
293 314 ESC_HELP2 = '??' # Find extra-detailed information about object
294 315 ESC_MAGIC = '%' # Call magic function
295 316 ESC_MAGIC2 = '%%' # Call cell-magic function
296 317 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
297 318 ESC_QUOTE2 = ';' # Quote all args as a single string, call
298 319 ESC_PAREN = '/' # Call first argument with rest of line as arguments
299 320
300 321 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
301 322 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
302 323
303 324 def _make_help_call(target, esc, next_input=None):
304 325 """Prepares a pinfo(2)/psearch call from a target name and the escape
305 326 (i.e. ? or ??)"""
306 327 method = 'pinfo2' if esc == '??' \
307 328 else 'psearch' if '*' in target \
308 329 else 'pinfo'
309 330 arg = " ".join([method, target])
310 331 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
311 332 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
312 333 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
313 334 if next_input is None:
314 335 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
315 336 else:
316 337 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
317 338 (next_input, t_magic_name, t_magic_arg_s)
318 339
319 340 def _tr_help(content):
320 341 """Translate lines escaped with: ?
321 342
322 343 A naked help line should fire the intro help screen (shell.show_usage())
323 344 """
324 345 if not content:
325 346 return 'get_ipython().show_usage()'
326 347
327 348 return _make_help_call(content, '?')
328 349
329 350 def _tr_help2(content):
330 351 """Translate lines escaped with: ??
331 352
332 353 A naked help line should fire the intro help screen (shell.show_usage())
333 354 """
334 355 if not content:
335 356 return 'get_ipython().show_usage()'
336 357
337 358 return _make_help_call(content, '??')
338 359
339 360 def _tr_magic(content):
340 361 "Translate lines escaped with a percent sign: %"
341 362 name, _, args = content.partition(' ')
342 363 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
343 364
344 365 def _tr_quote(content):
345 366 "Translate lines escaped with a comma: ,"
346 367 name, _, args = content.partition(' ')
347 368 return '%s("%s")' % (name, '", "'.join(args.split()) )
348 369
349 370 def _tr_quote2(content):
350 371 "Translate lines escaped with a semicolon: ;"
351 372 name, _, args = content.partition(' ')
352 373 return '%s("%s")' % (name, args)
353 374
354 375 def _tr_paren(content):
355 376 "Translate lines escaped with a slash: /"
356 377 name, _, args = content.partition(' ')
357 378 return '%s(%s)' % (name, ", ".join(args.split()))
358 379
359 380 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
360 381 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
361 382 ESC_HELP : _tr_help,
362 383 ESC_HELP2 : _tr_help2,
363 384 ESC_MAGIC : _tr_magic,
364 385 ESC_QUOTE : _tr_quote,
365 386 ESC_QUOTE2 : _tr_quote2,
366 387 ESC_PAREN : _tr_paren }
367 388
368 389 class EscapedCommand(TokenTransformBase):
369 390 """Transformer for escaped commands like %foo, !foo, or /foo"""
370 391 @classmethod
371 392 def find(cls, tokens_by_line):
372 393 """Find the first escaped command (%foo, !foo, etc.) in the cell.
373 394 """
374 395 for line in tokens_by_line:
375 396 if not line:
376 397 continue
377 398 ix = 0
378 399 ll = len(line)
379 400 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
380 401 ix += 1
381 402 if ix >= ll:
382 403 continue
383 404 if line[ix].string in ESCAPE_SINGLES:
384 405 return cls(line[ix].start)
385 406
386 407 def transform(self, lines):
387 408 """Transform an escaped line found by the ``find()`` classmethod.
388 409 """
389 410 start_line, start_col = self.start_line, self.start_col
390 411
391 412 indent = lines[start_line][:start_col]
392 413 end_line = find_end_of_continued_line(lines, start_line)
393 414 line = assemble_continued_line(lines, (start_line, start_col), end_line)
394 415
395 416 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
396 417 escape, content = line[:2], line[2:]
397 418 else:
398 419 escape, content = line[:1], line[1:]
399 420
400 421 if escape in tr:
401 422 call = tr[escape](content)
402 423 else:
403 424 call = ''
404 425
405 426 lines_before = lines[:start_line]
406 427 new_line = indent + call + '\n'
407 428 lines_after = lines[end_line + 1:]
408 429
409 430 return lines_before + [new_line] + lines_after
410 431
411 432 _help_end_re = re.compile(r"""(%{0,2}
412 433 (?!\d)[\w*]+ # Variable name
413 434 (\.(?!\d)[\w*]+)* # .etc.etc
414 435 )
415 436 (\?\??)$ # ? or ??
416 437 """,
417 438 re.VERBOSE)
418 439
419 440 class HelpEnd(TokenTransformBase):
420 441 """Transformer for help syntax: obj? and obj??"""
421 442 # This needs to be higher priority (lower number) than EscapedCommand so
422 443 # that inspecting magics (%foo?) works.
423 444 priority = 5
424 445
425 446 def __init__(self, start, q_locn):
426 447 super().__init__(start)
427 448 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
428 449 self.q_col = q_locn[1]
429 450
430 451 @classmethod
431 452 def find(cls, tokens_by_line):
432 453 """Find the first help command (foo?) in the cell.
433 454 """
434 455 for line in tokens_by_line:
435 456 # Last token is NEWLINE; look at last but one
436 457 if len(line) > 2 and line[-2].string == '?':
437 458 # Find the first token that's not INDENT/DEDENT
438 459 ix = 0
439 460 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
440 461 ix += 1
441 462 return cls(line[ix].start, line[-2].start)
442 463
443 464 def transform(self, lines):
444 465 """Transform a help command found by the ``find()`` classmethod.
445 466 """
446 467 piece = ''.join(lines[self.start_line:self.q_line+1])
447 468 indent, content = piece[:self.start_col], piece[self.start_col:]
448 469 lines_before = lines[:self.start_line]
449 470 lines_after = lines[self.q_line + 1:]
450 471
451 472 m = _help_end_re.search(content)
452 473 if not m:
453 474 raise SyntaxError(content)
454 475 assert m is not None, content
455 476 target = m.group(1)
456 477 esc = m.group(3)
457 478
458 479 # If we're mid-command, put it back on the next prompt for the user.
459 480 next_input = None
460 481 if (not lines_before) and (not lines_after) \
461 482 and content.strip() != m.group(0):
462 483 next_input = content.rstrip('?\n')
463 484
464 485 call = _make_help_call(target, esc, next_input=next_input)
465 486 new_line = indent + call + '\n'
466 487
467 488 return lines_before + [new_line] + lines_after
468 489
469 490 def make_tokens_by_line(lines:List[str]):
470 491 """Tokenize a series of lines and group tokens by line.
471 492
472 493 The tokens for a multiline Python string or expression are grouped as one
473 494 line. All lines except the last lines should keep their line ending ('\\n',
474 495 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
475 496 for example when passing block of text to this function.
476 497
477 498 """
478 499 # NL tokens are used inside multiline expressions, but also after blank
479 500 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
480 501 # We want to group the former case together but split the latter, so we
481 502 # track parentheses level, similar to the internals of tokenize.
482 503
483 504 # reexported from token on 3.7+
484 505 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
485 506 tokens_by_line:List[List[Any]] = [[]]
486 507 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
487 508 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
488 509 parenlev = 0
489 510 try:
490 511 for token in tokenize.generate_tokens(iter(lines).__next__):
491 512 tokens_by_line[-1].append(token)
492 513 if (token.type == NEWLINE) \
493 514 or ((token.type == NL) and (parenlev <= 0)):
494 515 tokens_by_line.append([])
495 516 elif token.string in {'(', '[', '{'}:
496 517 parenlev += 1
497 518 elif token.string in {')', ']', '}'}:
498 519 if parenlev > 0:
499 520 parenlev -= 1
500 521 except tokenize.TokenError:
501 522 # Input ended in a multiline string or expression. That's OK for us.
502 523 pass
503 524
504 525
505 526 if not tokens_by_line[-1]:
506 527 tokens_by_line.pop()
507 528
508 529
509 530 return tokens_by_line
510 531
511 532
512 533 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
513 534 """Check if the depth of brackets in the list of tokens drops below 0"""
514 535 parenlev = 0
515 536 for token in tokens:
516 537 if token.string in {"(", "[", "{"}:
517 538 parenlev += 1
518 539 elif token.string in {")", "]", "}"}:
519 540 parenlev -= 1
520 541 if parenlev < 0:
521 542 return True
522 543 return False
523 544
524 545
525 546 def show_linewise_tokens(s: str):
526 547 """For investigation and debugging"""
527 548 if not s.endswith('\n'):
528 549 s += '\n'
529 550 lines = s.splitlines(keepends=True)
530 551 for line in make_tokens_by_line(lines):
531 552 print("Line -------")
532 553 for tokinfo in line:
533 554 print(" ", tokinfo)
534 555
535 556 # Arbitrary limit to prevent getting stuck in infinite loops
536 557 TRANSFORM_LOOP_LIMIT = 500
537 558
538 559 class TransformerManager:
539 560 """Applies various transformations to a cell or code block.
540 561
541 562 The key methods for external use are ``transform_cell()``
542 563 and ``check_complete()``.
543 564 """
544 565 def __init__(self):
545 566 self.cleanup_transforms = [
546 567 leading_empty_lines,
547 568 leading_indent,
548 569 classic_prompt,
549 570 ipython_prompt,
550 571 ]
551 572 self.line_transforms = [
552 573 cell_magic,
553 574 ]
554 575 self.token_transformers = [
555 576 MagicAssign,
556 577 SystemAssign,
557 578 EscapedCommand,
558 579 HelpEnd,
559 580 ]
560 581
561 582 def do_one_token_transform(self, lines):
562 583 """Find and run the transform earliest in the code.
563 584
564 585 Returns (changed, lines).
565 586
566 587 This method is called repeatedly until changed is False, indicating
567 588 that all available transformations are complete.
568 589
569 590 The tokens following IPython special syntax might not be valid, so
570 591 the transformed code is retokenised every time to identify the next
571 592 piece of special syntax. Hopefully long code cells are mostly valid
572 593 Python, not using lots of IPython special syntax, so this shouldn't be
573 594 a performance issue.
574 595 """
575 596 tokens_by_line = make_tokens_by_line(lines)
576 597 candidates = []
577 598 for transformer_cls in self.token_transformers:
578 599 transformer = transformer_cls.find(tokens_by_line)
579 600 if transformer:
580 601 candidates.append(transformer)
581 602
582 603 if not candidates:
583 604 # Nothing to transform
584 605 return False, lines
585 606 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
586 607 for transformer in ordered_transformers:
587 608 try:
588 609 return True, transformer.transform(lines)
589 610 except SyntaxError:
590 611 pass
591 612 return False, lines
592 613
593 614 def do_token_transforms(self, lines):
594 615 for _ in range(TRANSFORM_LOOP_LIMIT):
595 616 changed, lines = self.do_one_token_transform(lines)
596 617 if not changed:
597 618 return lines
598 619
599 620 raise RuntimeError("Input transformation still changing after "
600 621 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
601 622
602 623 def transform_cell(self, cell: str) -> str:
603 624 """Transforms a cell of input code"""
604 625 if not cell.endswith('\n'):
605 626 cell += '\n' # Ensure the cell has a trailing newline
606 627 lines = cell.splitlines(keepends=True)
607 628 for transform in self.cleanup_transforms + self.line_transforms:
608 629 lines = transform(lines)
609 630
610 631 lines = self.do_token_transforms(lines)
611 632 return ''.join(lines)
612 633
613 634 def check_complete(self, cell: str):
614 635 """Return whether a block of code is ready to execute, or should be continued
615 636
616 637 Parameters
617 638 ----------
618 639 source : string
619 640 Python input code, which can be multiline.
620 641
621 642 Returns
622 643 -------
623 644 status : str
624 645 One of 'complete', 'incomplete', or 'invalid' if source is not a
625 646 prefix of valid code.
626 647 indent_spaces : int or None
627 648 The number of spaces by which to indent the next line of code. If
628 649 status is not 'incomplete', this is None.
629 650 """
630 651 # Remember if the lines ends in a new line.
631 652 ends_with_newline = False
632 653 for character in reversed(cell):
633 654 if character == '\n':
634 655 ends_with_newline = True
635 656 break
636 657 elif character.strip():
637 658 break
638 659 else:
639 660 continue
640 661
641 662 if not ends_with_newline:
642 663 # Append an newline for consistent tokenization
643 664 # See https://bugs.python.org/issue33899
644 665 cell += '\n'
645 666
646 667 lines = cell.splitlines(keepends=True)
647 668
648 669 if not lines:
649 670 return 'complete', None
650 671
651 672 if lines[-1].endswith('\\'):
652 673 # Explicit backslash continuation
653 674 return 'incomplete', find_last_indent(lines)
654 675
655 676 try:
656 677 for transform in self.cleanup_transforms:
657 678 if not getattr(transform, 'has_side_effects', False):
658 679 lines = transform(lines)
659 680 except SyntaxError:
660 681 return 'invalid', None
661 682
662 683 if lines[0].startswith('%%'):
663 684 # Special case for cell magics - completion marked by blank line
664 685 if lines[-1].strip():
665 686 return 'incomplete', find_last_indent(lines)
666 687 else:
667 688 return 'complete', None
668 689
669 690 try:
670 691 for transform in self.line_transforms:
671 692 if not getattr(transform, 'has_side_effects', False):
672 693 lines = transform(lines)
673 694 lines = self.do_token_transforms(lines)
674 695 except SyntaxError:
675 696 return 'invalid', None
676 697
677 698 tokens_by_line = make_tokens_by_line(lines)
678 699
679 700 # Bail if we got one line and there are more closing parentheses than
680 701 # the opening ones
681 702 if (
682 703 len(lines) == 1
683 704 and tokens_by_line
684 705 and has_sunken_brackets(tokens_by_line[0])
685 706 ):
686 707 return "invalid", None
687 708
688 709 if not tokens_by_line:
689 710 return 'incomplete', find_last_indent(lines)
690 711
691 712 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
692 713 # We're in a multiline string or expression
693 714 return 'incomplete', find_last_indent(lines)
694 715
695 716 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
696 717
697 718 # Pop the last line which only contains DEDENTs and ENDMARKER
698 719 last_token_line = None
699 720 if {t.type for t in tokens_by_line[-1]} in [
700 721 {tokenize.DEDENT, tokenize.ENDMARKER},
701 722 {tokenize.ENDMARKER}
702 723 ] and len(tokens_by_line) > 1:
703 724 last_token_line = tokens_by_line.pop()
704 725
705 726 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
706 727 tokens_by_line[-1].pop()
707 728
708 729 if not tokens_by_line[-1]:
709 730 return 'incomplete', find_last_indent(lines)
710 731
711 732 if tokens_by_line[-1][-1].string == ':':
712 733 # The last line starts a block (e.g. 'if foo:')
713 734 ix = 0
714 735 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
715 736 ix += 1
716 737
717 738 indent = tokens_by_line[-1][ix].start[1]
718 739 return 'incomplete', indent + 4
719 740
720 741 if tokens_by_line[-1][0].line.endswith('\\'):
721 742 return 'incomplete', None
722 743
723 744 # At this point, our checks think the code is complete (or invalid).
724 745 # We'll use codeop.compile_command to check this with the real parser
725 746 try:
726 747 with warnings.catch_warnings():
727 748 warnings.simplefilter('error', SyntaxWarning)
728 749 res = compile_command(''.join(lines), symbol='exec')
729 750 except (SyntaxError, OverflowError, ValueError, TypeError,
730 751 MemoryError, SyntaxWarning):
731 752 return 'invalid', None
732 753 else:
733 754 if res is None:
734 755 return 'incomplete', find_last_indent(lines)
735 756
736 757 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
737 758 if ends_with_newline:
738 759 return 'complete', None
739 760 return 'incomplete', find_last_indent(lines)
740 761
741 762 # If there's a blank line at the end, assume we're ready to execute
742 763 if not lines[-1].strip():
743 764 return 'complete', None
744 765
745 766 return 'complete', None
746 767
747 768
748 769 def find_last_indent(lines):
749 770 m = _indent_re.match(lines[-1])
750 771 if not m:
751 772 return 0
752 773 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now